src/gromacs/simd/scalar/scalar.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2016,2017,2018,2019, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 #ifndef GMX_SIMD_SCALAR_H
  36 #define GMX_SIMD_SCALAR_H
  37
  38 #include <cmath>
  39 #include <cstdint>
  40 #include <cstdlib>
  41
  42 #include <algorithm>
  43
  44 /*! \libinternal \file
  45  *
  46  * \brief Scalar float functions corresponding to GROMACS SIMD functions
  47  *
  48  * These versions make it possible to write functions that are templated with
  49  * either a SIMD or scalar type. While some of these functions might not appear
  50  * SIMD-specific, we have placed them here because the only reason to use these
  51  * instead of generic function is in templated combined SIMD/non-SIMD code.
  52  *
  53  * There are a handful of limitations, in particular that it is impossible
  54  * to overload the bitwise logical operators on built-in types.
  55  *
  56  * \author Erik Lindahl <erik.lindahl@gmail.com>
  57  *
  58  * \inlibraryapi
  59  * \ingroup module_simd
  60  */
  61
  62 namespace gmx
  63 {
  64
  65 /************************************************************************
  66  *   Single-precision floating point functions mimicking SIMD versions  *
  67  ************************************************************************/
  68
  69 /*! \brief Store contents of float variable to aligned memory m.
  70  *
  71  * \param[out] m Pointer to memory.
  72  * \param a float variable to store
  73  *
  74  * \note This function might be superficially meaningless, but it helps us to
  75  *       write templated SIMD/non-SIMD code. For clarity it should not be used
  76  *       outside such code.
  77  */
  78 static inline void
  79 store(float *m, float a)
  80 {
  81     *m = a;
  82 }
  83
  84 /*! \brief Store contents of float variable to unaligned memory m.
  85  *
  86  * \param[out] m Pointer to memory, no alignment requirement.
  87  * \param a float variable to store.
  88  *
  89  * \note This function might be superficially meaningless, but it helps us to
  90  *       write templated SIMD/non-SIMD code. For clarity it should not be used
  91  *       outside such code.
  92  */
  93 static inline void
  94 storeU(float *m, float a)
  95 {
  96     *m = a;
  97 }
  98
  99 // We cannot overload the logical operators and, or, andNot, xor for
 100 // built-in types.
 101
 102 /*! \brief Float Fused-multiply-add. Result is a*b + c.
 103  *
 104  * \param a factor1
 105  * \param b factor2
 106  * \param c term
 107  * \return a*b + c
 108  *
 109  * \note This function might be superficially meaningless, but it helps us to
 110  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 111  *       outside such code.
 112  */
 113 static inline float
 114 fma(float a, float b, float c)
 115 {
 116     // Note that we purposely do not use the single-rounding std::fma
 117     // as that can be very slow without hardware support
 118     return a*b + c;
 119 }
 120
 121 /*! \brief Float Fused-multiply-subtract. Result is a*b - c.
 122  *
 123  * \param a factor1
 124  * \param b factor2
 125  * \param c term
 126  * \return a*b - c
 127  *
 128  * \note This function might be superficially meaningless, but it helps us to
 129  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 130  *       outside such code.
 131  */
 132 static inline float
 133 fms(float a, float b, float c)
 134 {
 135     return a*b - c;
 136 }
 137
 138 /*! \brief Float Fused-negated-multiply-add. Result is -a*b + c.
 139  *
 140  * \param a factor1
 141  * \param b factor2
 142  * \param c term
 143  * \return -a*b + c
 144  *
 145  * \note This function might be superficially meaningless, but it helps us to
 146  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 147  *       outside such code.
 148  */
 149 static inline float
 150 fnma(float a, float b, float c)
 151 {
 152     return c - a*b;
 153 }
 154
 155 /*! \brief Float Fused-negated-multiply-subtract. Result is -a*b - c.
 156  *
 157  * \param a factor1
 158  * \param b factor2
 159  * \param c term
 160  * \return -a*b - c
 161  *
 162  * \note This function might be superficially meaningless, but it helps us to
 163  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 164  *       outside such code.
 165  */
 166 static inline float
 167 fnms(float a, float b, float c)
 168 {
 169     return -a*b - c;
 170 }
 171
 172 /*! \brief Add two float variables, masked version.
 173  *
 174  * \param a term1
 175  * \param b term2
 176  * \param m mask
 177  * \return a+b where mask is true, a otherwise.
 178  *
 179  * \note This function might be superficially meaningless, but it helps us to
 180  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 181  *       outside such code.
 182  */
 183 static inline float
 184 maskAdd(float a, float b, float m)
 185 {
 186     return a + (m != 0.0F ? b : 0.0F);
 187 }
 188
 189 /*! \brief Multiply two float variables, masked version.
 190  *
 191  * \param a factor1
 192  * \param b factor2
 193  * \param m mask
 194  * \return a*b where mask is true, 0.0 otherwise.
 195  *
 196  * \note This function might be superficially meaningless, but it helps us to
 197  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 198  *       outside such code.
 199  */
 200 static inline float
 201 maskzMul(float a, float b, float m)
 202 {
 203     return m != 0.0F ? (a * b) : 0.0F;
 204 }
 205
 206 /*! \brief Float fused multiply-add, masked version.
 207  *
 208  * \param a factor1
 209  * \param b factor2
 210  * \param c term
 211  * \param m mask
 212  * \return a*b+c where mask is true, 0.0 otherwise.
 213  *
 214  * \note This function might be superficially meaningless, but it helps us to
 215  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 216  *       outside such code.
 217  */
 218 static inline float
 219 maskzFma(float a, float b, float c, float m)
 220 {
 221     return m != 0.0F ? (a * b + c) : 0.0F;
 222 }
 223
 224 /*! \brief Float Floating-point abs().
 225  *
 226  * \param a any floating point values
 227  * \return abs(a) for each element.
 228  *
 229  * \note This function might be superficially meaningless, but it helps us to
 230  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 231  *       outside such code.
 232  */
 233 static inline float
 234 abs(float a)
 235 {
 236     return std::abs(a);
 237 }
 238
 239 /*! \brief Set each float element to the largest from two variables.
 240  *
 241  * \param a Any floating-point value
 242  * \param b Any floating-point value
 243  * \return max(a,b) for each element.
 244  *
 245  * \note This function might be superficially meaningless, but it helps us to
 246  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 247  *       outside such code.
 248  */
 249 static inline float
 250 max(float a, float b)
 251 {
 252     return std::max(a, b);
 253 }
 254
 255 /*! \brief Set each float element to the smallest from two variables.
 256  *
 257  * \param a Any floating-point value
 258  * \param b Any floating-point value
 259  * \return min(a,b) for each element.
 260  *
 261  * \note This function might be superficially meaningless, but it helps us to
 262  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 263  *       outside such code.
 264  */
 265 static inline float
 266 min(float a, float b)
 267 {
 268     return std::min(a, b);
 269 }
 270
 271 /*! \brief Float round to nearest integer value (in floating-point format).
 272  *
 273  * \param a Any floating-point value
 274  * \return The nearest integer, represented in floating-point format.
 275  *
 276  * \note This function might be superficially meaningless, but it helps us to
 277  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 278  *       outside such code.
 279  */
 280 static inline float
 281 round(float a)
 282 {
 283     return std::round(a);
 284 }
 285
 286 /*! \brief Truncate float, i.e. round towards zero - common hardware instruction.
 287  *
 288  * \param a Any floating-point value
 289  * \return Integer rounded towards zero, represented in floating-point format.
 290  *
 291  * \note This function might be superficially meaningless, but it helps us to
 292  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 293  *       outside such code.
 294  */
 295 static inline float
 296 trunc(float a)
 297 {
 298     return std::trunc(a);
 299 }
 300
 301 /*! \brief Return sum of all elements in float variable (i.e., the variable itself).
 302  *
 303  * \param a variable to reduce/sum.
 304  * \return The argument variable itself.
 305  *
 306  * \note This function might be superficially meaningless, but it helps us to
 307  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 308  *       outside such code.
 309  */
 310 static inline float
 311 reduce(float a)
 312 {
 313     return a;
 314 }
 315
 316 /*! \brief Bitwise andnot for two scalar float variables.
 317  *
 318  * \param a data1
 319  * \param b data2
 320  * \return (~data1) & data2
 321  *
 322  * \note This function might be superficially meaningless, but it helps us to
 323  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 324  *       outside such code.
 325  */
 326 static inline float
 327 andNot(float a, float b)
 328 {
 329     union
 330     {
 331         float         r;
 332         std::uint32_t i;
 333     } conv1, conv2;
 334
 335     conv1.r = a;
 336     conv2.r = b;
 337
 338     conv1.i = (~conv1.i) & conv2.i;
 339
 340     return conv1.r;
 341 }
 342
 343 /*! \brief Return true if any bits are set in the float variable.
 344  *
 345  * This function is used to handle bitmasks, mainly for exclusions in the
 346  * inner kernels. Note that it will return true even for -0.0f (sign bit set),
 347  * so it is not identical to not-equal.
 348  *
 349  * \param a value
 350  * \return True if any bit in a is nonzero.
 351  *
 352  * \note This function might be superficially meaningless, but it helps us to
 353  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 354  *       outside such code.
 355  */
 356 static inline bool
 357 testBits(float a)
 358 {
 359     union
 360     {
 361         std::uint32_t i;
 362         float         f;
 363     } conv;
 364
 365     conv.f = a;
 366     return (conv.i != 0);
 367 }
 368
 369 /*! \brief Returns if the boolean is true.
 370  *
 371  * \param a Logical variable.
 372  * \return true if a is true, otherwise false.
 373  *
 374  * \note This function might be superficially meaningless, but it helps us to
 375  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 376  *       outside such code.
 377  */
 378 static inline bool
 379 anyTrue(bool a)
 380 {
 381     return a;
 382 }
 383
 384 /*! \brief Select from single precision variable where boolean is true.
 385  *
 386  * \param a Floating-point variable to select from
 387  * \param mask Boolean selector
 388  * \return  a is selected for true, 0 for false.
 389  *
 390  * \note This function might be superficially meaningless, but it helps us to
 391  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 392  *       outside such code.
 393  */
 394 static inline float
 395 selectByMask(float a, bool mask)
 396 {
 397     return mask ? a : 0.0F;
 398 }
 399
 400 /*! \brief Select from single precision variable where boolean is false.
 401  *
 402  * \param a Floating-point variable to select from
 403  * \param mask Boolean selector
 404  * \return  a is selected for false, 0 for true.
 405  *
 406  * \note This function might be superficially meaningless, but it helps us to
 407  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 408  *       outside such code.
 409  */
 410 static inline float
 411 selectByNotMask(float a, bool mask)
 412 {
 413     return mask ? 0.0F : a;
 414 }
 415
 416 /*! \brief Blend float selection.
 417  *
 418  * \param a First source
 419  * \param b Second source
 420  * \param sel Boolean selector
 421  * \return Select b if sel is true, a otherwise.
 422  *
 423  * \note This function might be superficially meaningless, but it helps us to
 424  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 425  *       outside such code.
 426  */
 427 static inline float
 428 blend(float a, float b, bool sel)
 429 {
 430     return sel ? b : a;
 431 }
 432
 433 /*! \brief Round single precision floating point to integer.
 434  *
 435  * \param a float
 436  * \return Integer format, a rounded to nearest integer.
 437  *
 438  * \note This function might be superficially meaningless, but it helps us to
 439  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 440  *       outside such code.
 441  */
 442 static inline std::int32_t
 443 cvtR2I(float a)
 444 {
 445     return static_cast<std::int32_t>(std::round(a));
 446 };
 447
 448 /*! \brief Truncate single precision floating point to integer.
 449  *
 450  * \param a float
 451  * \return Integer format, a truncated to integer.
 452  *
 453  * \note This function might be superficially meaningless, but it helps us to
 454  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 455  *       outside such code.
 456  */
 457 static inline std::int32_t
 458 cvttR2I(float a)
 459 {
 460     return static_cast<std::int32_t>(std::trunc(a));
 461 };
 462
 463 /*! \brief Return integer.
 464  *
 465  * This function mimicks the SIMD integer-to-real conversion routines. By
 466  * simply returning an integer, we let the compiler sort out whether the
 467  * conversion should be to float or double rather than using proxy objects.
 468  *
 469  * \param a integer
 470  * \return same value (a)
 471  *
 472  * \note This function might be superficially meaningless, but it helps us to
 473  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 474  *       outside such code.
 475  */
 476 static inline std::int32_t
 477 cvtI2R(std::int32_t a)
 478 {
 479     return a;
 480 }
 481
 482 /************************************************************************
 483  *   Double-precision floating point functions mimicking SIMD versions  *
 484  ************************************************************************/
 485
 486 /*! \brief Store contents of double variable to aligned memory m.
 487  *
 488  * \param[out] m Pointer to memory.
 489  * \param a double variable to store
 490  *
 491  * \note This function might be superficially meaningless, but it helps us to
 492  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 493  *       outside such code.
 494  */
 495 static inline void
 496 store(double *m, double a)
 497 {
 498     *m = a;
 499 }
 500
 501 /*! \brief Store contents of double variable to unaligned memory m.
 502  *
 503  * \param[out] m Pointer to memory, no alignment requirement.
 504  * \param a double variable to store.
 505  *
 506  * \note This function might be superficially meaningless, but it helps us to
 507  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 508  *       outside such code.
 509  */
 510 static inline void
 511 storeU(double *m, double a)
 512 {
 513     *m = a;
 514 }
 515
 516 // We cannot overload the logical operators and, or, andNot, xor for
 517 // built-in types.
 518
 519 /*! \brief double Fused-multiply-add. Result is a*b + c.
 520  *
 521  * \param a factor1
 522  * \param b factor2
 523  * \param c term
 524  * \return a*b + c
 525  *
 526  * \note This function might be superficially meaningless, but it helps us to
 527  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 528  *       outside such code.
 529  */
 530 static inline double
 531 fma(double a, double b, double c)
 532 {
 533     // Note that we purposely do not use the single-rounding std::fma
 534     // as that can be very slow without hardware support
 535     return a*b + c;
 536 }
 537
 538 /*! \brief double Fused-multiply-subtract. Result is a*b - c.
 539  *
 540  * \param a factor1
 541  * \param b factor2
 542  * \param c term
 543  * \return a*b - c
 544  *
 545  * \note This function might be superficially meaningless, but it helps us to
 546  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 547  *       outside such code.
 548  */
 549 static inline double
 550 fms(double a, double b, double c)
 551 {
 552     return a*b - c;
 553 }
 554
 555 /*! \brief double Fused-negated-multiply-add. Result is - a*b + c.
 556  *
 557  * \param a factor1
 558  * \param b factor2
 559  * \param c term
 560  * \return -a*b + c
 561  *
 562  * \note This function might be superficially meaningless, but it helps us to
 563  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 564  *       outside such code.
 565  */
 566 static inline double
 567 fnma(double a, double b, double c)
 568 {
 569     return c - a*b;
 570 }
 571
 572 /*! \brief double Fused-negated-multiply-subtract. Result is -a*b - c.
 573  *
 574  * \param a factor1
 575  * \param b factor2
 576  * \param c term
 577  * \return -a*b - c
 578  *
 579  * \note This function might be superficially meaningless, but it helps us to
 580  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 581  *       outside such code.
 582  */
 583 static inline double
 584 fnms(double a, double b, double c)
 585 {
 586     return -a*b - c;
 587 }
 588
 589 /*! \brief Add two double variables, masked version.
 590  *
 591  * \param a term1
 592  * \param b term2
 593  * \param m mask
 594  * \return a+b where mask is true, a otherwise.
 595  *
 596  * \note This function might be superficially meaningless, but it helps us to
 597  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 598  *       outside such code.
 599  */
 600 static inline double
 601 maskAdd(double a, double b, double m)
 602 {
 603     return a + (m != 0.0 ? b : 0.0);
 604 }
 605
 606 /*! \brief Multiply two double variables, masked version.
 607  *
 608  * \param a factor1
 609  * \param b factor2
 610  * \param m mask
 611  * \return a*b where mask is true, 0.0 otherwise.
 612  *
 613  * \note This function might be superficially meaningless, but it helps us to
 614  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 615  *       outside such code.
 616  */
 617 static inline double
 618 maskzMul(double a, double b, double m)
 619 {
 620     return m != 0.0 ? (a * b) : 0.0;
 621 }
 622
 623 /*! \brief double fused multiply-add, masked version.
 624  *
 625  * \param a factor1
 626  * \param b factor2
 627  * \param c term
 628  * \param m mask
 629  * \return a*b+c where mask is true, 0.0 otherwise.
 630  *
 631  * \note This function might be superficially meaningless, but it helps us to
 632  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 633  *       outside such code.
 634  */
 635 static inline double
 636 maskzFma(double a, double b, double c, double m)
 637 {
 638     return m != 0.0 ? (a * b + c) : 0.0;
 639 }
 640
 641 /*! \brief double doubleing-point abs().
 642  *
 643  * \param a any doubleing point values
 644  * \return abs(a) for each element.
 645  *
 646  * \note This function might be superficially meaningless, but it helps us to
 647  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 648  *       outside such code.
 649  */
 650 static inline double
 651 abs(double a)
 652 {
 653     return std::abs(a);
 654 }
 655
 656 /*! \brief Set each double element to the largest from two variables.
 657  *
 658  * \param a Any doubleing-point value
 659  * \param b Any doubleing-point value
 660  * \return max(a,b) for each element.
 661  *
 662  * \note This function might be superficially meaningless, but it helps us to
 663  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 664  *       outside such code.
 665  */
 666 static inline double
 667 max(double a, double b)
 668 {
 669     return std::max(a, b);
 670 }
 671
 672 /*! \brief Set each double element to the smallest from two variables.
 673  *
 674  * \param a Any doubleing-point value
 675  * \param b Any doubleing-point value
 676  * \return min(a,b) for each element.
 677  *
 678  * \note This function might be superficially meaningless, but it helps us to
 679  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 680  *       outside such code.
 681  */
 682 static inline double
 683 min(double a, double b)
 684 {
 685     return std::min(a, b);
 686 }
 687
 688 /*! \brief double round to nearest integer value (in doubleing-point format).
 689  *
 690  * \param a Any doubleing-point value
 691  * \return The nearest integer, represented in doubleing-point format.
 692  *
 693  * \note This function might be superficially meaningless, but it helps us to
 694  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 695  *       outside such code.
 696  */
 697 static inline double
 698 round(double a)
 699 {
 700     return std::round(a);
 701 }
 702
 703 /*! \brief Truncate double, i.e. round towards zero - common hardware instruction.
 704  *
 705  * \param a Any doubleing-point value
 706  * \return Integer rounded towards zero, represented in doubleing-point format.
 707  *
 708  * \note This function might be superficially meaningless, but it helps us to
 709  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 710  *       outside such code.
 711  */
 712 static inline double
 713 trunc(double a)
 714 {
 715     return std::trunc(a);
 716 }
 717
 718 /*! \brief Return sum of all elements in double variable (i.e., the variable itself).
 719  *
 720  * \param a variable to reduce/sum.
 721  * \return The argument variable itself.
 722  *
 723  * \note This function might be superficially meaningless, but it helps us to
 724  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 725  *       outside such code.
 726  */
 727 static inline double
 728 reduce(double a)
 729 {
 730     return a;
 731 }
 732
 733 /*! \brief Bitwise andnot for two scalar double variables.
 734  *
 735  * \param a data1
 736  * \param b data2
 737  * \return (~data1) & data2
 738  *
 739  * \note This function might be superficially meaningless, but it helps us to
 740  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 741  *       outside such code.
 742  */
 743 static inline double
 744 andNot(double a, double b)
 745 {
 746     union
 747     {
 748         double        r;
 749         std::uint64_t i;
 750     } conv1, conv2;
 751
 752     conv1.r = a;
 753     conv2.r = b;
 754
 755     conv1.i = (~conv1.i) & conv2.i;
 756
 757     return conv1.r;
 758 }
 759
 760 /*! \brief Return true if any bits are set in the double variable.
 761  *
 762  * This function is used to handle bitmasks, mainly for exclusions in the
 763  * inner kernels. Note that it will return true even for -0.0 (sign bit set),
 764  * so it is not identical to not-equal.
 765  *
 766  * \param a value
 767  * \return True if any bit in a is nonzero.
 768  *
 769  * \note This function might be superficially meaningless, but it helps us to
 770  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 771  *       outside such code.
 772  */
 773 static inline bool
 774 testBits(double a)
 775 {
 776     union
 777     {
 778         std::uint64_t  i;
 779         double         f;
 780     } conv;
 781
 782     conv.f = a;
 783     return (conv.i != 0);
 784 }
 785
 786 /*! \brief Select from double precision variable where boolean is true.
 787  *
 788  * \param a double variable to select from
 789  * \param mask Boolean selector
 790  * \return  a is selected for true, 0 for false.
 791  *
 792  * \note This function might be superficially meaningless, but it helps us to
 793  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 794  *       outside such code.
 795  */
 796 static inline double
 797 selectByMask(double a, bool mask)
 798 {
 799     return mask ? a : 0.0;
 800 }
 801
 802 /*! \brief Select from double precision variable where boolean is false.
 803  *
 804  * \param a double variable to select from
 805  * \param mask Boolean selector
 806  * \return  a is selected for false, 0 for true.
 807  *
 808  * \note This function might be superficially meaningless, but it helps us to
 809  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 810  *       outside such code.
 811  */
 812 static inline double
 813 selectByNotMask(double a, bool mask)
 814 {
 815     return mask ? 0.0 : a;
 816 }
 817
 818 /*! \brief Blend double selection.
 819  *
 820  * \param a First source
 821  * \param b Second source
 822  * \param sel Boolean selector
 823  * \return Select b if sel is true, a otherwise.
 824  *
 825  * \note This function might be superficially meaningless, but it helps us to
 826  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 827  *       outside such code.
 828  */
 829 static inline double
 830 blend(double a, double b, bool sel)
 831 {
 832     return sel ? b : a;
 833 }
 834
 835 /*! \brief Round single precision doubleing point to integer.
 836  *
 837  * \param a double
 838  * \return Integer format, a rounded to nearest integer.
 839  *
 840  * \note This function might be superficially meaningless, but it helps us to
 841  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 842  *       outside such code.
 843  */
 844 static inline std::int32_t
 845 cvtR2I(double a)
 846 {
 847     return static_cast<std::int32_t>(std::round(a));
 848 };
 849
 850 /*! \brief Truncate single precision doubleing point to integer.
 851  *
 852  * \param a double
 853  * \return Integer format, a truncated to integer.
 854  *
 855  * \note This function might be superficially meaningless, but it helps us to
 856  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 857  *       outside such code.
 858  */
 859 static inline std::int32_t
 860 cvttR2I(double a)
 861 {
 862     return static_cast<std::int32_t>(std::trunc(a));
 863 };
 864
 865 // We do not have a separate cvtI2R for double, since that would require
 866 // proxy objects. Instead, the float version returns an integer and lets the
 867 // compiler sort out the conversion type.
 868
 869
 870 /*! \brief Convert float to double (mimicks SIMD conversion)
 871  *
 872  * \param a float
 873  * \return a, as double double
 874  *
 875  * \note This function might be superficially meaningless, but it helps us to
 876  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 877  *       outside such code.
 878  */
 879 static inline double
 880 cvtF2D(float a)
 881 {
 882     return a;
 883 }
 884
 885 /*! \brief Convert double to float (mimicks SIMD conversion)
 886  *
 887  * \param a double
 888  * \return a, as float
 889  *
 890  * \note This function might be superficially meaningless, but it helps us to
 891  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 892  *       outside such code.
 893  */
 894 static inline float
 895 cvtD2F(double a)
 896 {
 897     return a;
 898 }
 899
 900 /************************************************
 901  *   Integer functions mimicking SIMD versions  *
 902  ************************************************/
 903
 904 /*! \brief Store contents of integer variable to aligned memory m.
 905  *
 906  * \param[out] m Pointer to memory.
 907  * \param a integer variable to store
 908  *
 909  * \note This function might be superficially meaningless, but it helps us to
 910  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 911  *       outside such code.
 912  */
 913 static inline void
 914 store(std::int32_t *m, std::int32_t a)
 915 {
 916     *m = a;
 917 }
 918
 919 /*! \brief Store contents of integer variable to unaligned memory m.
 920  *
 921  * \param[out] m Pointer to memory, no alignment requirement.
 922  * \param a integer variable to store.
 923  *
 924  * \note This function might be superficially meaningless, but it helps us to
 925  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 926  *       outside such code.
 927  */
 928 static inline void
 929 storeU(std::int32_t *m, std::int32_t a)
 930 {
 931     *m = a;
 932 }
 933
 934 /*! \brief Bitwise andnot for two scalar integer variables.
 935  *
 936  * \param a data1
 937  * \param b data2
 938  * \return (~data1) & data2
 939  *
 940  * \note This function might be superficially meaningless, but it helps us to
 941  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 942  *       outside such code.
 943  */
 944 static inline std::int32_t
 945 andNot(std::int32_t a, std::int32_t b)
 946 {
 947     return ~a & b;
 948 }
 949
 950 /*! \brief Return true if any bits are set in the integer variable.
 951  *
 952  * This function is used to handle bitmasks, mainly for exclusions in the
 953  * inner kernels.
 954  *
 955  * \param a value
 956  * \return True if any bit in a is nonzero.
 957  *
 958  * \note This function might be superficially meaningless, but it helps us to
 959  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 960  *       outside such code.
 961  */
 962 static inline bool
 963 testBits(std::int32_t a)
 964 {
 965     return (a != 0);
 966 }
 967
 968 /*! \brief Select from integer variable where boolean is true.
 969  *
 970  * \param a Integer variable to select from
 971  * \param mask Boolean selector
 972  * \return  a is selected for true, 0 for false.
 973  *
 974  * \note This function might be superficially meaningless, but it helps us to
 975  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 976  *       outside such code.
 977  */
 978 static inline std::int32_t
 979 selectByMask(std::int32_t a, bool mask)
 980 {
 981     return mask ? a : 0;
 982 }
 983
 984 /*! \brief Select from integer variable where boolean is false.
 985  *
 986  * \param a Integer variable to select from
 987  * \param mask Boolean selector
 988  * \return  a is selected for false, 0 for true.
 989  *
 990  * \note This function might be superficially meaningless, but it helps us to
 991  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 992  *       outside such code.
 993  */
 994 static inline std::int32_t
 995 selectByNotMask(std::int32_t a, bool mask)
 996 {
 997     return mask ? 0 : a;
 998 }
 999
1000 /*! \brief Blend integer selection.
1001  *
1002  * \param a First source
1003  * \param b Second source
1004  * \param sel Boolean selector
1005  * \return Select b if sel is true, a otherwise.
1006  *
1007  * \note This function might be superficially meaningless, but it helps us to
1008  *       write templated SIMD/non-SIMD code. For clarity it should not be used
1009  *       outside such code.
1010  */
1011 static inline std::int32_t
1012 blend(std::int32_t a, std::int32_t b, bool sel)
1013 {
1014     return sel ? b : a;
1015 }
1016
1017 /*! \brief Just return a boolean (mimicks SIMD real-to-int bool conversions)
1018  *
1019  * \param a  boolean
1020  * \return same boolean
1021  *
1022  * \note This function might be superficially meaningless, but it helps us to
1023  *       write templated SIMD/non-SIMD code. For clarity it should not be used
1024  *       outside such code.
1025  */
1026 static inline bool
1027 cvtB2IB(bool a)
1028 {
1029     return a;
1030 }
1031
1032 /*! \brief Just return a boolean (mimicks SIMD int-to-real bool conversions)
1033  *
1034  * \param a  boolean
1035  * \return same boolean
1036  *
1037  * \note This function might be superficially meaningless, but it helps us to
1038  *       write templated SIMD/non-SIMD code. For clarity it should not be used
1039  *       outside such code.
1040  */
1041 static inline bool
1042 cvtIB2B(bool a)
1043 {
1044     return a;
1045 }
1046
1047 } // namespace gmx
1048
1049
1050 #endif // GMX_SIMD_SCALAR_FLOAT_H