src/gromacs/simd/impl_reference/impl_reference_simd4_double.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2014,2015,2019,2021, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 #ifndef GMX_SIMD_IMPL_REFERENCE_SIMD4_DOUBLE_H
  37 #define GMX_SIMD_IMPL_REFERENCE_SIMD4_DOUBLE_H
  38
  39 /*! \libinternal \file
  40  *
  41  * \brief Reference implementation, SIMD4 single precision.
  42  *
  43  * \author Erik Lindahl <erik.lindahl@scilifelab.se>
  44  *
  45  * \ingroup module_simd
  46  */
  47
  48 #include "config.h"
  49
  50 #include <cassert>
  51 #include <cmath>
  52 #include <cstddef>
  53 #include <cstdint>
  54
  55 #include <algorithm>
  56 #include <array>
  57
  58 #include "impl_reference_definitions.h"
  59
  60 namespace gmx
  61 {
  62
  63 /*! \cond libapi */
  64 /*! \addtogroup module_simd */
  65 /*! \{ */
  66
  67 /*! \name Constant width-4 double precision SIMD types and instructions
  68  * \{
  69  */
  70
  71 /*! \libinternal \brief SIMD4 double type.
  72  *
  73  * Available if \ref GMX_SIMD4_HAVE_DOUBLE is 1.
  74  *
  75  * \note This variable cannot be placed inside other structures or classes, since
  76  *       some compilers (including at least clang-3.7) appear to lose the
  77  *       alignment. This is likely particularly severe when allocating such
  78  *       memory on the heap, but it occurs for stack structures too.
  79  */
  80 class Simd4Double
  81 {
  82 public:
  83     Simd4Double() {}
  84
  85     //! \brief Construct from scalar
  86     Simd4Double(double d) { simdInternal_.fill(d); }
  87
  88     /*! \brief Internal SIMD data. Implementation dependent, don't touch.
  89      *
  90      * This has to be public to enable usage in combination with static inline
  91      * functions, but it should never, EVER, be accessed by any code outside
  92      * the corresponding implementation directory since the type will depend
  93      * on the architecture.
  94      */
  95     std::array<double, GMX_SIMD4_WIDTH> simdInternal_;
  96 };
  97
  98 /*! \libinternal  \brief SIMD4 variable type to use for logical comparisons on doubles.
  99  *
 100  * Available if \ref GMX_SIMD4_HAVE_DOUBLE is 1.
 101  *
 102  * \note This variable cannot be placed inside other structures or classes, since
 103  *       some compilers (including at least clang-3.7) appear to lose the
 104  *       alignment. This is likely particularly severe when allocating such
 105  *       memory on the heap, but it occurs for stack structures too.
 106  */
 107 class Simd4DBool
 108 {
 109 public:
 110     Simd4DBool() {}
 111
 112     //! \brief Construct from scalar
 113     Simd4DBool(bool b) { simdInternal_.fill(b); }
 114
 115     /*! \brief Internal SIMD data. Implementation dependent, don't touch.
 116      *
 117      * This has to be public to enable usage in combination with static inline
 118      * functions, but it should never, EVER, be accessed by any code outside
 119      * the corresponding implementation directory since the type will depend
 120      * on the architecture.
 121      */
 122     std::array<bool, GMX_SIMD4_WIDTH> simdInternal_;
 123 };
 124
 125 /*! \brief Load 4 double values from aligned memory into SIMD4 variable.
 126  *
 127  * \param m Pointer to memory aligned to 4 elements.
 128  * \return SIMD4 variable with data loaded.
 129  */
 130 static inline Simd4Double gmx_simdcall load4(const double* m)
 131 {
 132     Simd4Double a;
 133
 134     assert(std::size_t(m) % (a.simdInternal_.size() * sizeof(double)) == 0);
 135
 136     std::copy(m, m + a.simdInternal_.size(), a.simdInternal_.begin());
 137     return a;
 138 }
 139
 140 /*! \brief Store the contents of SIMD4 double to aligned memory m.
 141  *
 142  * \param[out] m Pointer to memory, aligned to 4 elements.
 143  * \param a SIMD4 variable to store
 144  */
 145 static inline void gmx_simdcall store4(double* m, Simd4Double a)
 146 {
 147     assert(std::size_t(m) % (a.simdInternal_.size() * sizeof(double)) == 0);
 148
 149     std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
 150 }
 151
 152 /*! \brief Load SIMD4 double from unaligned memory.
 153  *
 154  * Available if \ref GMX_SIMD_HAVE_LOADU is 1.
 155  *
 156  * \param m Pointer to memory, no alignment requirement.
 157  * \return SIMD4 variable with data loaded.
 158  */
 159 static inline Simd4Double gmx_simdcall load4U(const double* m)
 160 {
 161     Simd4Double a;
 162     std::copy(m, m + a.simdInternal_.size(), a.simdInternal_.begin());
 163     return a;
 164 }
 165
 166 /*! \brief Store SIMD4 double to unaligned memory.
 167  *
 168  * Available if \ref GMX_SIMD_HAVE_STOREU is 1.
 169  *
 170  * \param[out] m Pointer to memory, no alignment requirement.
 171  * \param a SIMD4 variable to store.
 172  */
 173 static inline void gmx_simdcall store4U(double* m, Simd4Double a)
 174 {
 175     std::copy(a.simdInternal_.begin(), a.simdInternal_.end(), m);
 176 }
 177
 178 /*! \brief Set all SIMD4 double elements to 0.
 179  *
 180  * You should typically just call \ref gmx::setZero(), which uses proxy objects
 181  * internally to handle all types rather than adding the suffix used here.
 182  *
 183  * \return SIMD4 0.0
 184  */
 185 static inline Simd4Double gmx_simdcall simd4SetZeroD()
 186 {
 187     return Simd4Double(0.0);
 188 }
 189
 190
 191 /*! \brief Bitwise and for two SIMD4 double variables.
 192  *
 193  * Supported if \ref GMX_SIMD_HAVE_LOGICAL is 1.
 194  *
 195  * \param a data1
 196  * \param b data2
 197  * \return data1 & data2
 198  */
 199 static inline Simd4Double gmx_simdcall operator&(Simd4Double a, Simd4Double b)
 200 {
 201     Simd4Double res;
 202
 203     union
 204     {
 205         double       r;
 206         std::int64_t i;
 207     } conv1, conv2;
 208
 209     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 210     {
 211         conv1.r              = a.simdInternal_[i];
 212         conv2.r              = b.simdInternal_[i];
 213         conv1.i              = conv1.i & conv2.i;
 214         res.simdInternal_[i] = conv1.r;
 215     }
 216     return res;
 217 }
 218
 219
 220 /*! \brief Bitwise andnot for two SIMD4 double variables. c=(~a) & b.
 221  *
 222  * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
 223  *
 224  * \param a data1
 225  * \param b data2
 226  * \return (~data1) & data2
 227  */
 228 static inline Simd4Double gmx_simdcall andNot(Simd4Double a, Simd4Double b)
 229 {
 230     Simd4Double res;
 231
 232     union
 233     {
 234         double       r;
 235         std::int64_t i;
 236     } conv1, conv2;
 237
 238     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 239     {
 240         conv1.r              = a.simdInternal_[i];
 241         conv2.r              = b.simdInternal_[i];
 242         conv1.i              = ~conv1.i & conv2.i;
 243         res.simdInternal_[i] = conv1.r;
 244     }
 245     return res;
 246 }
 247
 248
 249 /*! \brief Bitwise or for two SIMD4 doubles.
 250  *
 251  * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
 252  *
 253  * \param a data1
 254  * \param b data2
 255  * \return data1 | data2
 256  */
 257 static inline Simd4Double gmx_simdcall operator|(Simd4Double a, Simd4Double b)
 258 {
 259     Simd4Double res;
 260
 261     union
 262     {
 263         double       r;
 264         std::int64_t i;
 265     } conv1, conv2;
 266
 267     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 268     {
 269         conv1.r              = a.simdInternal_[i];
 270         conv2.r              = b.simdInternal_[i];
 271         conv1.i              = conv1.i | conv2.i;
 272         res.simdInternal_[i] = conv1.r;
 273     }
 274     return res;
 275 }
 276
 277 /*! \brief Bitwise xor for two SIMD4 double variables.
 278  *
 279  * Available if \ref GMX_SIMD_HAVE_LOGICAL is 1.
 280  *
 281  * \param a data1
 282  * \param b data2
 283  * \return data1 ^ data2
 284  */
 285 static inline Simd4Double gmx_simdcall operator^(Simd4Double a, Simd4Double b)
 286 {
 287     Simd4Double res;
 288
 289     union
 290     {
 291         double       r;
 292         std::int64_t i;
 293     } conv1, conv2;
 294
 295     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 296     {
 297         conv1.r              = a.simdInternal_[i];
 298         conv2.r              = b.simdInternal_[i];
 299         conv1.i              = conv1.i ^ conv2.i;
 300         res.simdInternal_[i] = conv1.r;
 301     }
 302     return res;
 303 }
 304
 305 /*! \brief Add two double SIMD4 variables.
 306  *
 307  * \param a term1
 308  * \param b term2
 309  * \return a+b
 310  */
 311 static inline Simd4Double gmx_simdcall operator+(Simd4Double a, Simd4Double b)
 312 {
 313     Simd4Double res;
 314
 315     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 316     {
 317         res.simdInternal_[i] = a.simdInternal_[i] + b.simdInternal_[i];
 318     }
 319     return res;
 320 }
 321
 322 /*! \brief Subtract two SIMD4 variables.
 323  *
 324  * \param a term1
 325  * \param b term2
 326  * \return a-b
 327  */
 328 static inline Simd4Double gmx_simdcall operator-(Simd4Double a, Simd4Double b)
 329 {
 330     Simd4Double res;
 331
 332     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 333     {
 334         res.simdInternal_[i] = a.simdInternal_[i] - b.simdInternal_[i];
 335     }
 336     return res;
 337 }
 338
 339 /*! \brief SIMD4 floating-point negate.
 340  *
 341  * \param a SIMD4 floating-point value
 342  * \return -a
 343  */
 344 static inline Simd4Double gmx_simdcall operator-(Simd4Double a)
 345 {
 346     Simd4Double res;
 347
 348     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 349     {
 350         res.simdInternal_[i] = -a.simdInternal_[i];
 351     }
 352     return res;
 353 }
 354
 355 /*! \brief Multiply two SIMD4 variables.
 356  *
 357  * \param a factor1
 358  * \param b factor2
 359  * \return a*b.
 360  */
 361 static inline Simd4Double gmx_simdcall operator*(Simd4Double a, Simd4Double b)
 362 {
 363     Simd4Double res;
 364
 365     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 366     {
 367         res.simdInternal_[i] = a.simdInternal_[i] * b.simdInternal_[i];
 368     }
 369     return res;
 370 }
 371
 372 /*! \brief SIMD4 Fused-multiply-add. Result is a*b+c.
 373  *
 374  * \param a factor1
 375  * \param b factor2
 376  * \param c term
 377  * \return a*b+c
 378  */
 379 static inline Simd4Double gmx_simdcall fma(Simd4Double a, Simd4Double b, Simd4Double c)
 380 {
 381     return a * b + c;
 382 }
 383
 384 /*! \brief SIMD4 Fused-multiply-subtract. Result is a*b-c.
 385  *
 386  * \param a factor1
 387  * \param b factor2
 388  * \param c term
 389  * \return a*b-c
 390  */
 391 static inline Simd4Double gmx_simdcall fms(Simd4Double a, Simd4Double b, Simd4Double c)
 392 {
 393     return a * b - c;
 394 }
 395
 396 /*! \brief SIMD4 Fused-negated-multiply-add. Result is -a*b+c.
 397  *
 398  * \param a factor1
 399  * \param b factor2
 400  * \param c term
 401  * \return -a*b+c
 402  */
 403 static inline Simd4Double gmx_simdcall fnma(Simd4Double a, Simd4Double b, Simd4Double c)
 404 {
 405     return c - a * b;
 406 }
 407
 408 /*! \brief SIMD4 Fused-negated-multiply-subtract. Result is -a*b-c.
 409  *
 410  * \param a factor1
 411  * \param b factor2
 412  * \param c term
 413  * \return -a*b-c
 414  */
 415 static inline Simd4Double gmx_simdcall fnms(Simd4Double a, Simd4Double b, Simd4Double c)
 416 {
 417     return -a * b - c;
 418 }
 419
 420 /*! \brief SIMD4 1.0/sqrt(x) lookup.
 421  *
 422  * This is a low-level instruction that should only be called from routines
 423  * implementing the inverse square root in simd_math.h.
 424  *
 425  * \param x Argument, x>0
 426  * \return Approximation of 1/sqrt(x), accuracy is \ref GMX_SIMD_RSQRT_BITS.
 427  */
 428 static inline Simd4Double gmx_simdcall rsqrt(Simd4Double x)
 429 {
 430     Simd4Double res;
 431
 432     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 433     {
 434         // sic - we only use single precision for the lookup
 435         res.simdInternal_[i] = 1.0F / std::sqrt(static_cast<float>(x.simdInternal_[i]));
 436     }
 437     return res;
 438 };
 439
 440
 441 /*! \brief SIMD4 Floating-point abs().
 442  *
 443  * \param a any floating point values
 444  * \return fabs(a) for each element.
 445  */
 446 static inline Simd4Double gmx_simdcall abs(Simd4Double a)
 447 {
 448     Simd4Double res;
 449
 450     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 451     {
 452         res.simdInternal_[i] = std::abs(a.simdInternal_[i]);
 453     }
 454     return res;
 455 }
 456
 457 /*! \brief Set each SIMD4 element to the largest from two variables.
 458  *
 459  * \param a Any floating-point value
 460  * \param b Any floating-point value
 461  * \return max(a,b) for each element.
 462  */
 463 static inline Simd4Double gmx_simdcall max(Simd4Double a, Simd4Double b)
 464 {
 465     Simd4Double res;
 466
 467     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 468     {
 469         res.simdInternal_[i] = std::max(a.simdInternal_[i], b.simdInternal_[i]);
 470     }
 471     return res;
 472 }
 473
 474
 475 /*! \brief Set each SIMD4 element to the largest from two variables.
 476  *
 477  * \param a Any floating-point value
 478  * \param b Any floating-point value
 479  * \return max(a,b) for each element.
 480  */
 481 static inline Simd4Double gmx_simdcall min(Simd4Double a, Simd4Double b)
 482 {
 483     Simd4Double res;
 484
 485     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 486     {
 487         res.simdInternal_[i] = std::min(a.simdInternal_[i], b.simdInternal_[i]);
 488     }
 489     return res;
 490 }
 491
 492
 493 /*! \brief SIMD4 Round to nearest integer value (in floating-point format).
 494  *
 495  * \param a Any floating-point value
 496  * \return The nearest integer, represented in floating-point format.
 497  */
 498 static inline Simd4Double gmx_simdcall round(Simd4Double a)
 499 {
 500     Simd4Double res;
 501
 502     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 503     {
 504         res.simdInternal_[i] = std::round(a.simdInternal_[i]);
 505     }
 506     return res;
 507 }
 508
 509
 510 /*! \brief Truncate SIMD4, i.e. round towards zero - common hardware instruction.
 511  *
 512  * \param a Any floating-point value
 513  * \return Integer rounded towards zero, represented in floating-point format.
 514  *
 515  * \note This is truncation towards zero, not floor(). The reason for this
 516  * is that truncation is virtually always present as a dedicated hardware
 517  * instruction, but floor() frequently isn't.
 518  */
 519 static inline Simd4Double gmx_simdcall trunc(Simd4Double a)
 520 {
 521     Simd4Double res;
 522
 523     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 524     {
 525         res.simdInternal_[i] = std::trunc(a.simdInternal_[i]);
 526     }
 527     return res;
 528 }
 529
 530 /*! \brief Return dot product of two double precision SIMD4 variables.
 531  *
 532  * The dot product is calculated between the first three elements in the two
 533  * vectors, while the fourth is ignored. The result is returned as a scalar.
 534  *
 535  * \param a vector1
 536  * \param b vector2
 537  * \result a[0]*b[0]+a[1]*b[1]+a[2]*b[2], returned as scalar. Last element is ignored.
 538  */
 539 static inline double gmx_simdcall dotProduct(Simd4Double a, Simd4Double b)
 540 {
 541     return (a.simdInternal_[0] * b.simdInternal_[0] + a.simdInternal_[1] * b.simdInternal_[1]
 542             + a.simdInternal_[2] * b.simdInternal_[2]);
 543 }
 544
 545 /*! \brief SIMD4 double transpose
 546  *
 547  * \param[in,out] v0  Row 0 on input, column 0 on output
 548  * \param[in,out] v1  Row 1 on input, column 1 on output
 549  * \param[in,out] v2  Row 2 on input, column 2 on output
 550  * \param[in,out] v3  Row 3 on input, column 3 on output
 551  */
 552 static inline void gmx_simdcall transpose(Simd4Double* v0, Simd4Double* v1, Simd4Double* v2, Simd4Double* v3)
 553 {
 554     Simd4Double t0       = *v0;
 555     Simd4Double t1       = *v1;
 556     Simd4Double t2       = *v2;
 557     Simd4Double t3       = *v3;
 558     v0->simdInternal_[0] = t0.simdInternal_[0];
 559     v0->simdInternal_[1] = t1.simdInternal_[0];
 560     v0->simdInternal_[2] = t2.simdInternal_[0];
 561     v0->simdInternal_[3] = t3.simdInternal_[0];
 562     v1->simdInternal_[0] = t0.simdInternal_[1];
 563     v1->simdInternal_[1] = t1.simdInternal_[1];
 564     v1->simdInternal_[2] = t2.simdInternal_[1];
 565     v1->simdInternal_[3] = t3.simdInternal_[1];
 566     v2->simdInternal_[0] = t0.simdInternal_[2];
 567     v2->simdInternal_[1] = t1.simdInternal_[2];
 568     v2->simdInternal_[2] = t2.simdInternal_[2];
 569     v2->simdInternal_[3] = t3.simdInternal_[2];
 570     v3->simdInternal_[0] = t0.simdInternal_[3];
 571     v3->simdInternal_[1] = t1.simdInternal_[3];
 572     v3->simdInternal_[2] = t2.simdInternal_[3];
 573     v3->simdInternal_[3] = t3.simdInternal_[3];
 574 }
 575
 576 /*! \brief a==b for SIMD4 double
 577  *
 578  * \param a value1
 579  * \param b value2
 580  * \return Each element of the boolean will be set to true if a==b.
 581  */
 582 static inline Simd4DBool gmx_simdcall operator==(Simd4Double a, Simd4Double b)
 583 {
 584     Simd4DBool res;
 585
 586     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 587     {
 588         res.simdInternal_[i] = (a.simdInternal_[i] == b.simdInternal_[i]);
 589     }
 590     return res;
 591 }
 592
 593 /*! \brief a!=b for SIMD4 double
 594  *
 595  * \param a value1
 596  * \param b value2
 597  * \return Each element of the boolean will be set to true if a!=b.
 598  */
 599 static inline Simd4DBool gmx_simdcall operator!=(Simd4Double a, Simd4Double b)
 600 {
 601     Simd4DBool res;
 602
 603     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 604     {
 605         res.simdInternal_[i] = (a.simdInternal_[i] != b.simdInternal_[i]);
 606     }
 607     return res;
 608 }
 609
 610 /*! \brief a<b for SIMD4 double
 611  *
 612  * \param a value1
 613  * \param b value2
 614  * \return Each element of the boolean will be set to true if a<b.
 615  */
 616 static inline Simd4DBool gmx_simdcall operator<(Simd4Double a, Simd4Double b)
 617 {
 618     Simd4DBool res;
 619
 620     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 621     {
 622         res.simdInternal_[i] = (a.simdInternal_[i] < b.simdInternal_[i]);
 623     }
 624     return res;
 625 }
 626
 627
 628 /*! \brief a<=b for SIMD4 double.
 629  *
 630  * \param a value1
 631  * \param b value2
 632  * \return Each element of the boolean will be set to true if a<=b.
 633  */
 634 static inline Simd4DBool gmx_simdcall operator<=(Simd4Double a, Simd4Double b)
 635 {
 636     Simd4DBool res;
 637
 638     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 639     {
 640         res.simdInternal_[i] = (a.simdInternal_[i] <= b.simdInternal_[i]);
 641     }
 642     return res;
 643 }
 644
 645 /*! \brief Logical \a and on single precision SIMD4 booleans.
 646  *
 647  * \param a logical vars 1
 648  * \param b logical vars 2
 649  * \return For each element, the result boolean is true if a \& b are true.
 650  *
 651  * \note This is not necessarily a bitwise operation - the storage format
 652  * of booleans is implementation-dependent.
 653  */
 654 static inline Simd4DBool gmx_simdcall operator&&(Simd4DBool a, Simd4DBool b)
 655 {
 656     Simd4DBool res;
 657
 658     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 659     {
 660         res.simdInternal_[i] = (a.simdInternal_[i] && b.simdInternal_[i]);
 661     }
 662     return res;
 663 }
 664
 665 /*! \brief Logical \a or on single precision SIMD4 booleans.
 666  *
 667  * \param a logical vars 1
 668  * \param b logical vars 2
 669  * \return For each element, the result boolean is true if a or b is true.
 670  *
 671  * Note that this is not necessarily a bitwise operation - the storage format
 672  * of booleans is implementation-dependent.
 673  */
 674 static inline Simd4DBool gmx_simdcall operator||(Simd4DBool a, Simd4DBool b)
 675 {
 676     Simd4DBool res;
 677
 678     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 679     {
 680         res.simdInternal_[i] = (a.simdInternal_[i] || b.simdInternal_[i]);
 681     }
 682     return res;
 683 }
 684
 685 /*! \brief Returns non-zero if any of the boolean in SIMD4 a is True, otherwise 0.
 686  *
 687  * \param a Logical variable.
 688  * \return true if any element in a is true, otherwise false.
 689  *
 690  * The actual return value for truth will depend on the architecture,
 691  * so any non-zero value is considered truth.
 692  */
 693 static inline bool gmx_simdcall anyTrue(Simd4DBool a)
 694 {
 695     bool res = false;
 696
 697     for (std::size_t i = 0; i < a.simdInternal_.size(); i++)
 698     {
 699         res = res || a.simdInternal_[i];
 700     }
 701     return res;
 702 }
 703
 704 /*! \brief Select from single precision SIMD4 variable where boolean is true.
 705  *
 706  * \param a Floating-point variable to select from
 707  * \param mask Boolean selector
 708  * \return  For each element, a is selected for true, 0 for false.
 709  */
 710 static inline Simd4Double gmx_simdcall selectByMask(Simd4Double a, Simd4DBool mask)
 711 {
 712     Simd4Double res;
 713
 714     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 715     {
 716         res.simdInternal_[i] = mask.simdInternal_[i] ? a.simdInternal_[i] : 0.0;
 717     }
 718     return res;
 719 }
 720
 721 /*! \brief Select from single precision SIMD4 variable where boolean is false.
 722  *
 723  * \param a Floating-point variable to select from
 724  * \param mask Boolean selector
 725  * \return  For each element, a is selected for false, 0 for true (sic).
 726  */
 727 static inline Simd4Double gmx_simdcall selectByNotMask(Simd4Double a, Simd4DBool mask)
 728 {
 729     Simd4Double res;
 730
 731     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 732     {
 733         res.simdInternal_[i] = mask.simdInternal_[i] ? 0.0 : a.simdInternal_[i];
 734     }
 735     return res;
 736 }
 737
 738
 739 /*! \brief Vector-blend SIMD4 selection.
 740  *
 741  * \param a First source
 742  * \param b Second source
 743  * \param sel Boolean selector
 744  * \return For each element, select b if sel is true, a otherwise.
 745  */
 746 static inline Simd4Double gmx_simdcall blend(Simd4Double a, Simd4Double b, Simd4DBool sel)
 747 {
 748     Simd4Double res;
 749
 750     for (std::size_t i = 0; i < res.simdInternal_.size(); i++)
 751     {
 752         res.simdInternal_[i] = sel.simdInternal_[i] ? b.simdInternal_[i] : a.simdInternal_[i];
 753     }
 754     return res;
 755 }
 756
 757
 758 /*! \brief Return sum of all elements in SIMD4 double variable.
 759  *
 760  * \param a SIMD4 variable to reduce/sum.
 761  * \return The sum of all elements in the argument variable.
 762  *
 763  */
 764 static inline double gmx_simdcall reduce(Simd4Double a)
 765 {
 766     double sum = 0.0;
 767
 768     for (std::size_t i = 0; i < a.simdInternal_.size(); i++)
 769     {
 770         sum += a.simdInternal_[i];
 771     }
 772     return sum;
 773 }
 774
 775 //! \}
 776
 777 //! \}
 778
 779 //! \endcond
 780
 781 } // namespace gmx
 782
 783 #endif // GMX_SIMD_IMPL_REFERENCE_SIMD4_DOUBLE_H