src/gromacs/simd/impl_reference/impl_reference_simd4_double.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2014,2015,2019, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 #ifndef GMX_SIMD_IMPL_REFERENCE_SIMD4_DOUBLE_H
  37 #define GMX_SIMD_IMPL_REFERENCE_SIMD4_DOUBLE_H
  38
  39 /*! \libinternal \file
  40  *
  41  * \brief Reference implementation, SIMD4 single precision.
  42  *
  43  * \author Erik Lindahl <erik.lindahl@scilifelab.se>
  44  *
  45  * \ingroup module_simd
  46  */
  47
  48 #include "config.h"
  49
  50 #include <cassert>
  51 #include <cmath>
  52 #include <cstddef>
  53 #include <cstdint>
  54
  55 #include <algorithm>
  56 #include <array>
  57
  58 #include "impl_reference_definitions.h"
  59
  60 namespace gmx
  61 {
  62
  63 /*! \cond libapi */
  64 /*! \addtogroup module_simd */
  65 /*! \{ */
  66
  67 /*! \name Constant width-4 double precision SIMD types and instructions
  68  * \{
  69  */
  70
  71 /*! \libinternal \brief SIMD4 double type.
  72  *
  73  * Available if \ref GMX_SIMD4_HAVE_DOUBLE is 1.
  74  *
  75  * \note This variable cannot be placed inside other structures or classes, since
  76  *       some compilers (including at least clang-3.7) appear to lose the
  77  *       alignment. This is likely particularly severe when allocating such
  78  *       memory on the heap, but it occurs for stack structures too.
  79  */
  80 class Simd4Double
  81 {
  82     public:
  83         Simd4Double() {}
  84
  85         //! \brief Construct from scalar
  86         Simd4Double(double d) { simdInternal_.fill(d); }
  87
  88         /*! \brief Internal SIMD data. Implementation dependent, don't touch.
  89          *
  90          * This has to be public to enable usage in combination with static inline
  91          * functions, but it should never, EVER, be accessed by any code outside
  92          * the corresponding implementation directory since the type will depend
  93          * on the architecture.
  94          */
  95         std::array<double, GMX_SIMD4_WIDTH>  simdInternal_;
  96 };
  97
  98 /*! \libinternal  \brief SIMD4 variable type to use for logical comparisons on doubles.
  99  *
 100  * Available if \ref GMX_SIMD4_HAVE_DOUBLE is 1.
 101  *
 102  * \note This variable cannot be placed inside other structures or classes, since
 103  *       some compilers (including at least clang-3.7) appear to lose the
 104  *       alignment. This is likely particularly severe when allocating such
 105  *       memory on the heap, but it occurs for stack structures too.
 106  */
 107 class Simd4DBool
 108 {
 109     public:
 110         Simd4DBool() {}
 111
 112         //! \brief Construct from scalar
 113         Simd4DBool(bool b) { simdInternal_.fill(b); }
 114
 115         /*! \brief Internal SIMD data. Implementation dependent, don't touch.
 116          *
 117          * This has to be public to enable usage in combination with static inline
 118          * functions, but it should never, EVER, be accessed by any code outside
 119          * the corresponding implementation directory since the type will depend
 120          * on the architecture.
 121          */
 122         std::array<bool, GMX_SIMD4_WIDTH>  simdInternal_;
 123 };
 124
 125 /*! \brief Load 4 double values from aligned memory into SIMD4 variable.
 126  *
 127  * \param m Pointer to memory aligned to 4 elements.
 128  * \return SIMD4 variable with data loaded.
 129  */
 130 static inline Simd4Double gmx_simdcall
 131 load4(const double *m)
 132 {
 133     Simd4Double a;
 134
 135     assert(std::size_t(m) % (a.simdInternal_.size()*sizeof(double)) == 0);
 136
 137     std::copy(m, m+a.simdInternal_.size(), a.simdInternal_.begin());
 138     return a;
 139 }
 140
 141 /*! \brief Store the contents of SIMD4 double to aligned memory m.
 142  *
 143  * \param[out] m Pointer to memory, aligned to 4 elements.
 144  * \param a SIMD4 variable to store
 145  */
 146 static inline void gmx_simdcall
 147 store4(double *m, Simd4Double a)
 148 {
 149     assert(std::size_t(m) % (a.simdInternal_.size()*sizeof(double)) == 0);
 150
 151     std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
 152 }
 153
 154 /*! \brief Load SIMD4 double from unaligned memory.
 155  *
 156  * Available if \ref GMX_SIMD_HAVE_LOADU is 1.
 157  *
 158  * \param m Pointer to memory, no alignment requirement.
 159  * \return SIMD4 variable with data loaded.
 160  */
 161 static inline Simd4Double gmx_simdcall
 162 load4U(const double *m)
 163 {
 164     Simd4Double a;
 165     std::copy(m, m+a.simdInternal_.size(), a.simdInternal_.begin());
 166     return a;
 167 }
 168
 169 /*! \brief Store SIMD4 double to unaligned memory.
 170  *
 171  * Available if \ref GMX_SIMD_HAVE_STOREU is 1.
 172  *
 173  * \param[out] m Pointer to memory, no alignment requirement.
 174  * \param a SIMD4 variable to store.
 175  */
 176 static inline void gmx_simdcall
 177 store4U(double *m, Simd4Double a)
 178 {
 179     std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
 180 }
 181
 182 /*! \brief Set all SIMD4 double elements to 0.
 183  *
 184  * You should typically just call \ref gmx::setZero(), which uses proxy objects
 185  * internally to handle all types rather than adding the suffix used here.
 186  *
 187  * \return SIMD4 0.0
 188  */
 189 static inline Simd4Double gmx_simdcall
 190 simd4SetZeroD()
 191 {
 192     return Simd4Double(0.0);
 193 }
 194
 195
 196 /*! \brief Bitwise and for two SIMD4 double variables.
 197  *
 198  * Supported if \ref GMX_SIMD_HAVE_LOGICAL is 1.
 199  *
 200  * \param a data1
 201  * \param b data2
 202  * \return data1 & data2
 203  */
 204 static inline Simd4Double gmx_simdcall
 205 operator&(Simd4Double a, Simd4Double b)
 206 {
 207     Simd4Double         res;
 208
 209     union
 210     {
 211         double        r;
 212         std::int64_t  i;
 213     }
 214     conv1, conv2;
 215
 216     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 217     {
 218         conv1.r              = a.simdInternal_[i];
 219         conv2.r              = b.simdInternal_[i];
 220         conv1.i              = conv1.i & conv2.i;
 221         res.simdInternal_[i] = conv1.r;
 222     }
 223     return res;
 224 }
 225
 226
 227 /*! \brief Bitwise andnot for two SIMD4 double variables. c=(~a) & b.
 228  *
 229  * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
 230  *
 231  * \param a data1
 232  * \param b data2
 233  * \return (~data1) & data2
 234  */
 235 static inline Simd4Double gmx_simdcall
 236 andNot(Simd4Double a, Simd4Double b)
 237 {
 238     Simd4Double         res;
 239
 240     union
 241     {
 242         double        r;
 243         std::int64_t  i;
 244     }
 245     conv1, conv2;
 246
 247     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 248     {
 249         conv1.r              = a.simdInternal_[i];
 250         conv2.r              = b.simdInternal_[i];
 251         conv1.i              = ~conv1.i & conv2.i;
 252         res.simdInternal_[i] = conv1.r;
 253     }
 254     return res;
 255 }
 256
 257
 258 /*! \brief Bitwise or for two SIMD4 doubles.
 259  *
 260  * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
 261  *
 262  * \param a data1
 263  * \param b data2
 264  * \return data1 | data2
 265  */
 266 static inline Simd4Double gmx_simdcall
 267 operator|(Simd4Double a, Simd4Double b)
 268 {
 269     Simd4Double         res;
 270
 271     union
 272     {
 273         double        r;
 274         std::int64_t  i;
 275     }
 276     conv1, conv2;
 277
 278     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 279     {
 280         conv1.r              = a.simdInternal_[i];
 281         conv2.r              = b.simdInternal_[i];
 282         conv1.i              = conv1.i | conv2.i;
 283         res.simdInternal_[i] = conv1.r;
 284     }
 285     return res;
 286 }
 287
 288 /*! \brief Bitwise xor for two SIMD4 double variables.
 289  *
 290  * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
 291  *
 292  * \param a data1
 293  * \param b data2
 294  * \return data1 ^ data2
 295  */
 296 static inline Simd4Double gmx_simdcall
 297 operator^(Simd4Double a, Simd4Double b)
 298 {
 299     Simd4Double         res;
 300
 301     union
 302     {
 303         double        r;
 304         std::int64_t  i;
 305     }
 306     conv1, conv2;
 307
 308     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 309     {
 310         conv1.r              = a.simdInternal_[i];
 311         conv2.r              = b.simdInternal_[i];
 312         conv1.i              = conv1.i ^ conv2.i;
 313         res.simdInternal_[i] = conv1.r;
 314     }
 315     return res;
 316 }
 317
 318 /*! \brief Add two double SIMD4 variables.
 319  *
 320  * \param a term1
 321  * \param b term2
 322  * \return a+b
 323  */
 324 static inline Simd4Double gmx_simdcall
 325 operator+(Simd4Double a, Simd4Double b)
 326 {
 327     Simd4Double         res;
 328
 329     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 330     {
 331         res.simdInternal_[i] = a.simdInternal_[i] + b.simdInternal_[i];
 332     }
 333     return res;
 334 }
 335
 336 /*! \brief Subtract two SIMD4 variables.
 337  *
 338  * \param a term1
 339  * \param b term2
 340  * \return a-b
 341  */
 342 static inline Simd4Double gmx_simdcall
 343 operator-(Simd4Double a, Simd4Double b)
 344 {
 345     Simd4Double         res;
 346
 347     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 348     {
 349         res.simdInternal_[i] = a.simdInternal_[i] - b.simdInternal_[i];
 350     }
 351     return res;
 352 }
 353
 354 /*! \brief SIMD4 floating-point negate.
 355  *
 356  * \param a SIMD4 floating-point value
 357  * \return -a
 358  */
 359 static inline Simd4Double gmx_simdcall
 360 operator-(Simd4Double a)
 361 {
 362     Simd4Double         res;
 363
 364     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 365     {
 366         res.simdInternal_[i] = -a.simdInternal_[i];
 367     }
 368     return res;
 369 }
 370
 371 /*! \brief Multiply two SIMD4 variables.
 372  *
 373  * \param a factor1
 374  * \param b factor2
 375  * \return a*b.
 376  */
 377 static inline Simd4Double gmx_simdcall
 378 operator*(Simd4Double a, Simd4Double b)
 379 {
 380     Simd4Double         res;
 381
 382     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 383     {
 384         res.simdInternal_[i] = a.simdInternal_[i] * b.simdInternal_[i];
 385     }
 386     return res;
 387 }
 388
 389 /*! \brief SIMD4 Fused-multiply-add. Result is a*b+c.
 390  *
 391  * \param a factor1
 392  * \param b factor2
 393  * \param c term
 394  * \return a*b+c
 395  */
 396 static inline Simd4Double gmx_simdcall
 397 fma(Simd4Double a, Simd4Double b, Simd4Double c)
 398 {
 399     return a*b+c;
 400 }
 401
 402 /*! \brief SIMD4 Fused-multiply-subtract. Result is a*b-c.
 403  *
 404  * \param a factor1
 405  * \param b factor2
 406  * \param c term
 407  * \return a*b-c
 408  */
 409 static inline Simd4Double gmx_simdcall
 410 fms(Simd4Double a, Simd4Double b, Simd4Double c)
 411 {
 412     return a*b-c;
 413 }
 414
 415 /*! \brief SIMD4 Fused-negated-multiply-add. Result is -a*b+c.
 416  *
 417  * \param a factor1
 418  * \param b factor2
 419  * \param c term
 420  * \return -a*b+c
 421  */
 422 static inline Simd4Double gmx_simdcall
 423 fnma(Simd4Double a, Simd4Double b, Simd4Double c)
 424 {
 425     return c-a*b;
 426 }
 427
 428 /*! \brief SIMD4 Fused-negated-multiply-subtract. Result is -a*b-c.
 429  *
 430  * \param a factor1
 431  * \param b factor2
 432  * \param c term
 433  * \return -a*b-c
 434  */
 435 static inline Simd4Double gmx_simdcall
 436 fnms(Simd4Double a, Simd4Double b, Simd4Double c)
 437 {
 438     return -a*b-c;
 439 }
 440
 441 /*! \brief SIMD4 1.0/sqrt(x) lookup.
 442  *
 443  * This is a low-level instruction that should only be called from routines
 444  * implementing the inverse square root in simd_math.h.
 445  *
 446  * \param x Argument, x>0
 447  * \return Approximation of 1/sqrt(x), accuracy is \ref GMX_SIMD_RSQRT_BITS.
 448  */
 449 static inline Simd4Double gmx_simdcall
 450 rsqrt(Simd4Double x)
 451 {
 452     Simd4Double         res;
 453
 454     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 455     {
 456         // sic - we only use single precision for the lookup
 457         res.simdInternal_[i] = 1.0F / std::sqrt(static_cast<float>(x.simdInternal_[i]));
 458     }
 459     return res;
 460 };
 461
 462
 463 /*! \brief SIMD4 Floating-point abs().
 464  *
 465  * \param a any floating point values
 466  * \return fabs(a) for each element.
 467  */
 468 static inline Simd4Double gmx_simdcall
 469 abs(Simd4Double a)
 470 {
 471     Simd4Double         res;
 472
 473     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 474     {
 475         res.simdInternal_[i] = std::abs(a.simdInternal_[i]);
 476     }
 477     return res;
 478 }
 479
 480 /*! \brief Set each SIMD4 element to the largest from two variables.
 481  *
 482  * \param a Any floating-point value
 483  * \param b Any floating-point value
 484  * \return max(a,b) for each element.
 485  */
 486 static inline Simd4Double gmx_simdcall
 487 max(Simd4Double a, Simd4Double b)
 488 {
 489     Simd4Double         res;
 490
 491     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 492     {
 493         res.simdInternal_[i] = std::max(a.simdInternal_[i], b.simdInternal_[i]);
 494     }
 495     return res;
 496 }
 497
 498
 499 /*! \brief Set each SIMD4 element to the largest from two variables.
 500  *
 501  * \param a Any floating-point value
 502  * \param b Any floating-point value
 503  * \return max(a,b) for each element.
 504  */
 505 static inline Simd4Double gmx_simdcall
 506 min(Simd4Double a, Simd4Double b)
 507 {
 508     Simd4Double         res;
 509
 510     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 511     {
 512         res.simdInternal_[i] = std::min(a.simdInternal_[i], b.simdInternal_[i]);
 513     }
 514     return res;
 515 }
 516
 517
 518 /*! \brief SIMD4 Round to nearest integer value (in floating-point format).
 519  *
 520  * \param a Any floating-point value
 521  * \return The nearest integer, represented in floating-point format.
 522  */
 523 static inline Simd4Double gmx_simdcall
 524 round(Simd4Double a)
 525 {
 526     Simd4Double         res;
 527
 528     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 529     {
 530         res.simdInternal_[i] = std::round(a.simdInternal_[i]);
 531     }
 532     return res;
 533 }
 534
 535
 536 /*! \brief Truncate SIMD4, i.e. round towards zero - common hardware instruction.
 537  *
 538  * \param a Any floating-point value
 539  * \return Integer rounded towards zero, represented in floating-point format.
 540  *
 541  * \note This is truncation towards zero, not floor(). The reason for this
 542  * is that truncation is virtually always present as a dedicated hardware
 543  * instruction, but floor() frequently isn't.
 544  */
 545 static inline Simd4Double gmx_simdcall
 546 trunc(Simd4Double a)
 547 {
 548     Simd4Double         res;
 549
 550     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 551     {
 552         res.simdInternal_[i] = std::trunc(a.simdInternal_[i]);
 553     }
 554     return res;
 555 }
 556
 557 /*! \brief Return dot product of two double precision SIMD4 variables.
 558  *
 559  * The dot product is calculated between the first three elements in the two
 560  * vectors, while the fourth is ignored. The result is returned as a scalar.
 561  *
 562  * \param a vector1
 563  * \param b vector2
 564  * \result a[0]*b[0]+a[1]*b[1]+a[2]*b[2], returned as scalar. Last element is ignored.
 565  */
 566 static inline double gmx_simdcall
 567 dotProduct(Simd4Double a, Simd4Double b)
 568 {
 569     return
 570         (a.simdInternal_[0] * b.simdInternal_[0] +
 571          a.simdInternal_[1] * b.simdInternal_[1] +
 572          a.simdInternal_[2] * b.simdInternal_[2]);
 573 }
 574
 575 /*! \brief SIMD4 double transpose
 576  *
 577  * \param[in,out] v0  Row 0 on input, column 0 on output
 578  * \param[in,out] v1  Row 1 on input, column 1 on output
 579  * \param[in,out] v2  Row 2 on input, column 2 on output
 580  * \param[in,out] v3  Row 3 on input, column 3 on output
 581  */
 582 static inline void gmx_simdcall
 583 transpose(Simd4Double * v0, Simd4Double * v1,
 584           Simd4Double * v2, Simd4Double * v3)
 585 {
 586     Simd4Double t0 = *v0;
 587     Simd4Double t1 = *v1;
 588     Simd4Double t2 = *v2;
 589     Simd4Double t3 = *v3;
 590     v0->simdInternal_[0] = t0.simdInternal_[0];
 591     v0->simdInternal_[1] = t1.simdInternal_[0];
 592     v0->simdInternal_[2] = t2.simdInternal_[0];
 593     v0->simdInternal_[3] = t3.simdInternal_[0];
 594     v1->simdInternal_[0] = t0.simdInternal_[1];
 595     v1->simdInternal_[1] = t1.simdInternal_[1];
 596     v1->simdInternal_[2] = t2.simdInternal_[1];
 597     v1->simdInternal_[3] = t3.simdInternal_[1];
 598     v2->simdInternal_[0] = t0.simdInternal_[2];
 599     v2->simdInternal_[1] = t1.simdInternal_[2];
 600     v2->simdInternal_[2] = t2.simdInternal_[2];
 601     v2->simdInternal_[3] = t3.simdInternal_[2];
 602     v3->simdInternal_[0] = t0.simdInternal_[3];
 603     v3->simdInternal_[1] = t1.simdInternal_[3];
 604     v3->simdInternal_[2] = t2.simdInternal_[3];
 605     v3->simdInternal_[3] = t3.simdInternal_[3];
 606 }
 607
 608 /*! \brief a==b for SIMD4 double
 609  *
 610  * \param a value1
 611  * \param b value2
 612  * \return Each element of the boolean will be set to true if a==b.
 613  */
 614 static inline Simd4DBool gmx_simdcall
 615 operator==(Simd4Double a, Simd4Double b)
 616 {
 617     Simd4DBool         res;
 618
 619     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 620     {
 621         res.simdInternal_[i] = (a.simdInternal_[i] == b.simdInternal_[i]);
 622     }
 623     return res;
 624 }
 625
 626 /*! \brief a!=b for SIMD4 double
 627  *
 628  * \param a value1
 629  * \param b value2
 630  * \return Each element of the boolean will be set to true if a!=b.
 631  */
 632 static inline Simd4DBool gmx_simdcall
 633 operator!=(Simd4Double a, Simd4Double b)
 634 {
 635     Simd4DBool         res;
 636
 637     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 638     {
 639         res.simdInternal_[i] = (a.simdInternal_[i] != b.simdInternal_[i]);
 640     }
 641     return res;
 642 }
 643
 644 /*! \brief a<b for SIMD4 double
 645  *
 646  * \param a value1
 647  * \param b value2
 648  * \return Each element of the boolean will be set to true if a<b.
 649  */
 650 static inline Simd4DBool gmx_simdcall
 651 operator<(Simd4Double a, Simd4Double b)
 652 {
 653     Simd4DBool          res;
 654
 655     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 656     {
 657         res.simdInternal_[i] = (a.simdInternal_[i] < b.simdInternal_[i]);
 658     }
 659     return res;
 660 }
 661
 662
 663 /*! \brief a<=b for SIMD4 double.
 664  *
 665  * \param a value1
 666  * \param b value2
 667  * \return Each element of the boolean will be set to true if a<=b.
 668  */
 669 static inline Simd4DBool gmx_simdcall
 670 operator<=(Simd4Double a, Simd4Double b)
 671 {
 672     Simd4DBool          res;
 673
 674     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 675     {
 676         res.simdInternal_[i] = (a.simdInternal_[i] <= b.simdInternal_[i]);
 677     }
 678     return res;
 679 }
 680
 681 /*! \brief Logical \a and on single precision SIMD4 booleans.
 682  *
 683  * \param a logical vars 1
 684  * \param b logical vars 2
 685  * \return For each element, the result boolean is true if a \& b are true.
 686  *
 687  * \note This is not necessarily a bitwise operation - the storage format
 688  * of booleans is implementation-dependent.
 689  */
 690 static inline Simd4DBool gmx_simdcall
 691 operator&&(Simd4DBool a, Simd4DBool b)
 692 {
 693     Simd4DBool         res;
 694
 695     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 696     {
 697         res.simdInternal_[i] = (a.simdInternal_[i] && b.simdInternal_[i]);
 698     }
 699     return res;
 700 }
 701
 702 /*! \brief Logical \a or on single precision SIMD4 booleans.
 703  *
 704  * \param a logical vars 1
 705  * \param b logical vars 2
 706  * \return For each element, the result boolean is true if a or b is true.
 707  *
 708  * Note that this is not necessarily a bitwise operation - the storage format
 709  * of booleans is implementation-dependent.
 710  */
 711 static inline Simd4DBool gmx_simdcall
 712 operator||(Simd4DBool a, Simd4DBool b)
 713 {
 714     Simd4DBool         res;
 715
 716     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 717     {
 718         res.simdInternal_[i] = (a.simdInternal_[i] || b.simdInternal_[i]);
 719     }
 720     return res;
 721 }
 722
 723 /*! \brief Returns non-zero if any of the boolean in SIMD4 a is True, otherwise 0.
 724  *
 725  * \param a Logical variable.
 726  * \return true if any element in a is true, otherwise false.
 727  *
 728  * The actual return value for truth will depend on the architecture,
 729  * so any non-zero value is considered truth.
 730  */
 731 static inline bool gmx_simdcall
 732 anyTrue(Simd4DBool a)
 733 {
 734     bool res = false;
 735
 736     for (std::size_t i = 0; i < a.simdInternal_.size(); i++)
 737     {
 738         res = res || a.simdInternal_[i];
 739     }
 740     return res;
 741 }
 742
 743 /*! \brief Select from single precision SIMD4 variable where boolean is true.
 744  *
 745  * \param a Floating-point variable to select from
 746  * \param mask Boolean selector
 747  * \return  For each element, a is selected for true, 0 for false.
 748  */
 749 static inline Simd4Double gmx_simdcall
 750 selectByMask(Simd4Double a, Simd4DBool mask)
 751 {
 752     Simd4Double          res;
 753
 754     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 755     {
 756         res.simdInternal_[i] = mask.simdInternal_[i] ? a.simdInternal_[i] : 0.0;
 757     }
 758     return res;
 759 }
 760
 761 /*! \brief Select from single precision SIMD4 variable where boolean is false.
 762  *
 763  * \param a Floating-point variable to select from
 764  * \param mask Boolean selector
 765  * \return  For each element, a is selected for false, 0 for true (sic).
 766  */
 767 static inline Simd4Double gmx_simdcall
 768 selectByNotMask(Simd4Double a, Simd4DBool mask)
 769 {
 770     Simd4Double          res;
 771
 772     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 773     {
 774         res.simdInternal_[i] = mask.simdInternal_[i] ? 0.0 : a.simdInternal_[i];
 775     }
 776     return res;
 777 }
 778
 779
 780 /*! \brief Vector-blend SIMD4 selection.
 781  *
 782  * \param a First source
 783  * \param b Second source
 784  * \param sel Boolean selector
 785  * \return For each element, select b if sel is true, a otherwise.
 786  */
 787 static inline Simd4Double gmx_simdcall
 788 blend(Simd4Double a, Simd4Double b, Simd4DBool sel)
 789 {
 790     Simd4Double         res;
 791
 792     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 793     {
 794         res.simdInternal_[i] = sel.simdInternal_[i] ? b.simdInternal_[i] : a.simdInternal_[i];
 795     }
 796     return res;
 797 }
 798
 799
 800 /*! \brief Return sum of all elements in SIMD4 double variable.
 801  *
 802  * \param a SIMD4 variable to reduce/sum.
 803  * \return The sum of all elements in the argument variable.
 804  *
 805  */
 806 static inline double gmx_simdcall
 807 reduce(Simd4Double a)
 808 {
 809     double sum = 0.0;
 810
 811     for (std::size_t i = 0; i < a.simdInternal_.size(); i++)
 812     {
 813         sum += a.simdInternal_[i];
 814     }
 815     return sum;
 816 }
 817
 818 //! \}
 819
 820 //! \}
 821
 822 //! \endcond
 823
 824 }      // namespace gmx
 825
 826 #endif // GMX_SIMD_IMPL_REFERENCE_SIMD4_DOUBLE_H