src/gromacs/simd/scalar/scalar.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2016,2017,2018,2019, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 #ifndef GMX_SIMD_SCALAR_H
  36 #define GMX_SIMD_SCALAR_H
  37
  38 #include <cmath>
  39 #include <cstdint>
  40 #include <cstdlib>
  41
  42 #include <algorithm>
  43
  44 /*! \libinternal \file
  45  *
  46  * \brief Scalar float functions corresponding to GROMACS SIMD functions
  47  *
  48  * These versions make it possible to write functions that are templated with
  49  * either a SIMD or scalar type. While some of these functions might not appear
  50  * SIMD-specific, we have placed them here because the only reason to use these
  51  * instead of generic function is in templated combined SIMD/non-SIMD code.
  52  *
  53  * There are a handful of limitations, in particular that it is impossible
  54  * to overload the bitwise logical operators on built-in types.
  55  *
  56  * \author Erik Lindahl <erik.lindahl@gmail.com>
  57  *
  58  * \inlibraryapi
  59  * \ingroup module_simd
  60  */
  61
  62 namespace gmx
  63 {
  64
  65 /************************************************************************
  66  *   Single-precision floating point functions mimicking SIMD versions  *
  67  ************************************************************************/
  68
  69 /*! \brief Store contents of float variable to aligned memory m.
  70  *
  71  * \param[out] m Pointer to memory.
  72  * \param a float variable to store
  73  *
  74  * \note This function might be superficially meaningless, but it helps us to
  75  *       write templated SIMD/non-SIMD code. For clarity it should not be used
  76  *       outside such code.
  77  */
  78 static inline void store(float* m, float a)
  79 {
  80     *m = a;
  81 }
  82
  83 /*! \brief Store contents of float variable to unaligned memory m.
  84  *
  85  * \param[out] m Pointer to memory, no alignment requirement.
  86  * \param a float variable to store.
  87  *
  88  * \note This function might be superficially meaningless, but it helps us to
  89  *       write templated SIMD/non-SIMD code. For clarity it should not be used
  90  *       outside such code.
  91  */
  92 static inline void storeU(float* m, float a)
  93 {
  94     *m = a;
  95 }
  96
  97 // We cannot overload the logical operators and, or, andNot, xor for
  98 // built-in types.
  99
 100 /*! \brief Float Fused-multiply-add. Result is a*b + c.
 101  *
 102  * \param a factor1
 103  * \param b factor2
 104  * \param c term
 105  * \return a*b + c
 106  *
 107  * \note This function might be superficially meaningless, but it helps us to
 108  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 109  *       outside such code.
 110  */
 111 static inline float fma(float a, float b, float c)
 112 {
 113     // Note that we purposely do not use the single-rounding std::fma
 114     // as that can be very slow without hardware support
 115     return a * b + c;
 116 }
 117
 118 /*! \brief Float Fused-multiply-subtract. Result is a*b - c.
 119  *
 120  * \param a factor1
 121  * \param b factor2
 122  * \param c term
 123  * \return a*b - c
 124  *
 125  * \note This function might be superficially meaningless, but it helps us to
 126  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 127  *       outside such code.
 128  */
 129 static inline float fms(float a, float b, float c)
 130 {
 131     return a * b - c;
 132 }
 133
 134 /*! \brief Float Fused-negated-multiply-add. Result is -a*b + c.
 135  *
 136  * \param a factor1
 137  * \param b factor2
 138  * \param c term
 139  * \return -a*b + c
 140  *
 141  * \note This function might be superficially meaningless, but it helps us to
 142  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 143  *       outside such code.
 144  */
 145 static inline float fnma(float a, float b, float c)
 146 {
 147     return c - a * b;
 148 }
 149
 150 /*! \brief Float Fused-negated-multiply-subtract. Result is -a*b - c.
 151  *
 152  * \param a factor1
 153  * \param b factor2
 154  * \param c term
 155  * \return -a*b - c
 156  *
 157  * \note This function might be superficially meaningless, but it helps us to
 158  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 159  *       outside such code.
 160  */
 161 static inline float fnms(float a, float b, float c)
 162 {
 163     return -a * b - c;
 164 }
 165
 166 /*! \brief Add two float variables, masked version.
 167  *
 168  * \param a term1
 169  * \param b term2
 170  * \param m mask
 171  * \return a+b where mask is true, a otherwise.
 172  *
 173  * \note This function might be superficially meaningless, but it helps us to
 174  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 175  *       outside such code.
 176  */
 177 static inline float maskAdd(float a, float b, float m)
 178 {
 179     return a + (m != 0.0F ? b : 0.0F);
 180 }
 181
 182 /*! \brief Multiply two float variables, masked version.
 183  *
 184  * \param a factor1
 185  * \param b factor2
 186  * \param m mask
 187  * \return a*b where mask is true, 0.0 otherwise.
 188  *
 189  * \note This function might be superficially meaningless, but it helps us to
 190  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 191  *       outside such code.
 192  */
 193 static inline float maskzMul(float a, float b, float m)
 194 {
 195     return m != 0.0F ? (a * b) : 0.0F;
 196 }
 197
 198 /*! \brief Float fused multiply-add, masked version.
 199  *
 200  * \param a factor1
 201  * \param b factor2
 202  * \param c term
 203  * \param m mask
 204  * \return a*b+c where mask is true, 0.0 otherwise.
 205  *
 206  * \note This function might be superficially meaningless, but it helps us to
 207  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 208  *       outside such code.
 209  */
 210 static inline float maskzFma(float a, float b, float c, float m)
 211 {
 212     return m != 0.0F ? (a * b + c) : 0.0F;
 213 }
 214
 215 /*! \brief Float Floating-point abs().
 216  *
 217  * \param a any floating point values
 218  * \return abs(a) for each element.
 219  *
 220  * \note This function might be superficially meaningless, but it helps us to
 221  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 222  *       outside such code.
 223  */
 224 static inline float abs(float a)
 225 {
 226     return std::abs(a);
 227 }
 228
 229 /*! \brief Set each float element to the largest from two variables.
 230  *
 231  * \param a Any floating-point value
 232  * \param b Any floating-point value
 233  * \return max(a,b) for each element.
 234  *
 235  * \note This function might be superficially meaningless, but it helps us to
 236  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 237  *       outside such code.
 238  */
 239 static inline float max(float a, float b)
 240 {
 241     return std::max(a, b);
 242 }
 243
 244 /*! \brief Set each float element to the smallest from two variables.
 245  *
 246  * \param a Any floating-point value
 247  * \param b Any floating-point value
 248  * \return min(a,b) for each element.
 249  *
 250  * \note This function might be superficially meaningless, but it helps us to
 251  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 252  *       outside such code.
 253  */
 254 static inline float min(float a, float b)
 255 {
 256     return std::min(a, b);
 257 }
 258
 259 /*! \brief Float round to nearest integer value (in floating-point format).
 260  *
 261  * \param a Any floating-point value
 262  * \return The nearest integer, represented in floating-point format.
 263  *
 264  * \note This function might be superficially meaningless, but it helps us to
 265  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 266  *       outside such code.
 267  */
 268 static inline float round(float a)
 269 {
 270     return std::round(a);
 271 }
 272
 273 /*! \brief Truncate float, i.e. round towards zero - common hardware instruction.
 274  *
 275  * \param a Any floating-point value
 276  * \return Integer rounded towards zero, represented in floating-point format.
 277  *
 278  * \note This function might be superficially meaningless, but it helps us to
 279  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 280  *       outside such code.
 281  */
 282 static inline float trunc(float a)
 283 {
 284     return std::trunc(a);
 285 }
 286
 287 /*! \brief Return sum of all elements in float variable (i.e., the variable itself).
 288  *
 289  * \param a variable to reduce/sum.
 290  * \return The argument variable itself.
 291  *
 292  * \note This function might be superficially meaningless, but it helps us to
 293  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 294  *       outside such code.
 295  */
 296 static inline float reduce(float a)
 297 {
 298     return a;
 299 }
 300
 301 /*! \brief Bitwise andnot for two scalar float variables.
 302  *
 303  * \param a data1
 304  * \param b data2
 305  * \return (~data1) & data2
 306  *
 307  * \note This function might be superficially meaningless, but it helps us to
 308  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 309  *       outside such code.
 310  */
 311 static inline float andNot(float a, float b)
 312 {
 313     union {
 314         float         r;
 315         std::uint32_t i;
 316     } conv1, conv2;
 317
 318     conv1.r = a;
 319     conv2.r = b;
 320
 321     conv1.i = (~conv1.i) & conv2.i;
 322
 323     return conv1.r;
 324 }
 325
 326 /*! \brief Return true if any bits are set in the float variable.
 327  *
 328  * This function is used to handle bitmasks, mainly for exclusions in the
 329  * inner kernels. Note that it will return true even for -0.0f (sign bit set),
 330  * so it is not identical to not-equal.
 331  *
 332  * \param a value
 333  * \return True if any bit in a is nonzero.
 334  *
 335  * \note This function might be superficially meaningless, but it helps us to
 336  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 337  *       outside such code.
 338  */
 339 static inline bool testBits(float a)
 340 {
 341     union {
 342         std::uint32_t i;
 343         float         f;
 344     } conv;
 345
 346     conv.f = a;
 347     return (conv.i != 0);
 348 }
 349
 350 /*! \brief Returns if the boolean is true.
 351  *
 352  * \param a Logical variable.
 353  * \return true if a is true, otherwise false.
 354  *
 355  * \note This function might be superficially meaningless, but it helps us to
 356  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 357  *       outside such code.
 358  */
 359 static inline bool anyTrue(bool a)
 360 {
 361     return a;
 362 }
 363
 364 /*! \brief Select from single precision variable where boolean is true.
 365  *
 366  * \param a Floating-point variable to select from
 367  * \param mask Boolean selector
 368  * \return  a is selected for true, 0 for false.
 369  *
 370  * \note This function might be superficially meaningless, but it helps us to
 371  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 372  *       outside such code.
 373  */
 374 static inline float selectByMask(float a, bool mask)
 375 {
 376     return mask ? a : 0.0F;
 377 }
 378
 379 /*! \brief Select from single precision variable where boolean is false.
 380  *
 381  * \param a Floating-point variable to select from
 382  * \param mask Boolean selector
 383  * \return  a is selected for false, 0 for true.
 384  *
 385  * \note This function might be superficially meaningless, but it helps us to
 386  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 387  *       outside such code.
 388  */
 389 static inline float selectByNotMask(float a, bool mask)
 390 {
 391     return mask ? 0.0F : a;
 392 }
 393
 394 /*! \brief Blend float selection.
 395  *
 396  * \param a First source
 397  * \param b Second source
 398  * \param sel Boolean selector
 399  * \return Select b if sel is true, a otherwise.
 400  *
 401  * \note This function might be superficially meaningless, but it helps us to
 402  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 403  *       outside such code.
 404  */
 405 static inline float blend(float a, float b, bool sel)
 406 {
 407     return sel ? b : a;
 408 }
 409
 410 /*! \brief Round single precision floating point to integer.
 411  *
 412  * \param a float
 413  * \return Integer format, a rounded to nearest integer.
 414  *
 415  * \note This function might be superficially meaningless, but it helps us to
 416  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 417  *       outside such code.
 418  */
 419 static inline std::int32_t cvtR2I(float a)
 420 {
 421     return static_cast<std::int32_t>(std::round(a));
 422 };
 423
 424 /*! \brief Truncate single precision floating point to integer.
 425  *
 426  * \param a float
 427  * \return Integer format, a truncated to integer.
 428  *
 429  * \note This function might be superficially meaningless, but it helps us to
 430  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 431  *       outside such code.
 432  */
 433 static inline std::int32_t cvttR2I(float a)
 434 {
 435     return static_cast<std::int32_t>(std::trunc(a));
 436 };
 437
 438 /*! \brief Return integer.
 439  *
 440  * This function mimicks the SIMD integer-to-real conversion routines. By
 441  * simply returning an integer, we let the compiler sort out whether the
 442  * conversion should be to float or double rather than using proxy objects.
 443  *
 444  * \param a integer
 445  * \return same value (a)
 446  *
 447  * \note This function might be superficially meaningless, but it helps us to
 448  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 449  *       outside such code.
 450  */
 451 static inline std::int32_t cvtI2R(std::int32_t a)
 452 {
 453     return a;
 454 }
 455
 456 /************************************************************************
 457  *   Double-precision floating point functions mimicking SIMD versions  *
 458  ************************************************************************/
 459
 460 /*! \brief Store contents of double variable to aligned memory m.
 461  *
 462  * \param[out] m Pointer to memory.
 463  * \param a double variable to store
 464  *
 465  * \note This function might be superficially meaningless, but it helps us to
 466  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 467  *       outside such code.
 468  */
 469 static inline void store(double* m, double a)
 470 {
 471     *m = a;
 472 }
 473
 474 /*! \brief Store contents of double variable to unaligned memory m.
 475  *
 476  * \param[out] m Pointer to memory, no alignment requirement.
 477  * \param a double variable to store.
 478  *
 479  * \note This function might be superficially meaningless, but it helps us to
 480  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 481  *       outside such code.
 482  */
 483 static inline void storeU(double* m, double a)
 484 {
 485     *m = a;
 486 }
 487
 488 // We cannot overload the logical operators and, or, andNot, xor for
 489 // built-in types.
 490
 491 /*! \brief double Fused-multiply-add. Result is a*b + c.
 492  *
 493  * \param a factor1
 494  * \param b factor2
 495  * \param c term
 496  * \return a*b + c
 497  *
 498  * \note This function might be superficially meaningless, but it helps us to
 499  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 500  *       outside such code.
 501  */
 502 static inline double fma(double a, double b, double c)
 503 {
 504     // Note that we purposely do not use the single-rounding std::fma
 505     // as that can be very slow without hardware support
 506     return a * b + c;
 507 }
 508
 509 /*! \brief double Fused-multiply-subtract. Result is a*b - c.
 510  *
 511  * \param a factor1
 512  * \param b factor2
 513  * \param c term
 514  * \return a*b - c
 515  *
 516  * \note This function might be superficially meaningless, but it helps us to
 517  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 518  *       outside such code.
 519  */
 520 static inline double fms(double a, double b, double c)
 521 {
 522     return a * b - c;
 523 }
 524
 525 /*! \brief double Fused-negated-multiply-add. Result is - a*b + c.
 526  *
 527  * \param a factor1
 528  * \param b factor2
 529  * \param c term
 530  * \return -a*b + c
 531  *
 532  * \note This function might be superficially meaningless, but it helps us to
 533  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 534  *       outside such code.
 535  */
 536 static inline double fnma(double a, double b, double c)
 537 {
 538     return c - a * b;
 539 }
 540
 541 /*! \brief double Fused-negated-multiply-subtract. Result is -a*b - c.
 542  *
 543  * \param a factor1
 544  * \param b factor2
 545  * \param c term
 546  * \return -a*b - c
 547  *
 548  * \note This function might be superficially meaningless, but it helps us to
 549  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 550  *       outside such code.
 551  */
 552 static inline double fnms(double a, double b, double c)
 553 {
 554     return -a * b - c;
 555 }
 556
 557 /*! \brief Add two double variables, masked version.
 558  *
 559  * \param a term1
 560  * \param b term2
 561  * \param m mask
 562  * \return a+b where mask is true, a otherwise.
 563  *
 564  * \note This function might be superficially meaningless, but it helps us to
 565  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 566  *       outside such code.
 567  */
 568 static inline double maskAdd(double a, double b, double m)
 569 {
 570     return a + (m != 0.0 ? b : 0.0);
 571 }
 572
 573 /*! \brief Multiply two double variables, masked version.
 574  *
 575  * \param a factor1
 576  * \param b factor2
 577  * \param m mask
 578  * \return a*b where mask is true, 0.0 otherwise.
 579  *
 580  * \note This function might be superficially meaningless, but it helps us to
 581  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 582  *       outside such code.
 583  */
 584 static inline double maskzMul(double a, double b, double m)
 585 {
 586     return m != 0.0 ? (a * b) : 0.0;
 587 }
 588
 589 /*! \brief double fused multiply-add, masked version.
 590  *
 591  * \param a factor1
 592  * \param b factor2
 593  * \param c term
 594  * \param m mask
 595  * \return a*b+c where mask is true, 0.0 otherwise.
 596  *
 597  * \note This function might be superficially meaningless, but it helps us to
 598  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 599  *       outside such code.
 600  */
 601 static inline double maskzFma(double a, double b, double c, double m)
 602 {
 603     return m != 0.0 ? (a * b + c) : 0.0;
 604 }
 605
 606 /*! \brief double doubleing-point abs().
 607  *
 608  * \param a any doubleing point values
 609  * \return abs(a) for each element.
 610  *
 611  * \note This function might be superficially meaningless, but it helps us to
 612  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 613  *       outside such code.
 614  */
 615 static inline double abs(double a)
 616 {
 617     return std::abs(a);
 618 }
 619
 620 /*! \brief Set each double element to the largest from two variables.
 621  *
 622  * \param a Any doubleing-point value
 623  * \param b Any doubleing-point value
 624  * \return max(a,b) for each element.
 625  *
 626  * \note This function might be superficially meaningless, but it helps us to
 627  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 628  *       outside such code.
 629  */
 630 static inline double max(double a, double b)
 631 {
 632     return std::max(a, b);
 633 }
 634
 635 /*! \brief Set each double element to the smallest from two variables.
 636  *
 637  * \param a Any doubleing-point value
 638  * \param b Any doubleing-point value
 639  * \return min(a,b) for each element.
 640  *
 641  * \note This function might be superficially meaningless, but it helps us to
 642  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 643  *       outside such code.
 644  */
 645 static inline double min(double a, double b)
 646 {
 647     return std::min(a, b);
 648 }
 649
 650 /*! \brief double round to nearest integer value (in doubleing-point format).
 651  *
 652  * \param a Any doubleing-point value
 653  * \return The nearest integer, represented in doubleing-point format.
 654  *
 655  * \note This function might be superficially meaningless, but it helps us to
 656  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 657  *       outside such code.
 658  */
 659 static inline double round(double a)
 660 {
 661     return std::round(a);
 662 }
 663
 664 /*! \brief Truncate double, i.e. round towards zero - common hardware instruction.
 665  *
 666  * \param a Any doubleing-point value
 667  * \return Integer rounded towards zero, represented in doubleing-point format.
 668  *
 669  * \note This function might be superficially meaningless, but it helps us to
 670  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 671  *       outside such code.
 672  */
 673 static inline double trunc(double a)
 674 {
 675     return std::trunc(a);
 676 }
 677
 678 /*! \brief Return sum of all elements in double variable (i.e., the variable itself).
 679  *
 680  * \param a variable to reduce/sum.
 681  * \return The argument variable itself.
 682  *
 683  * \note This function might be superficially meaningless, but it helps us to
 684  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 685  *       outside such code.
 686  */
 687 static inline double reduce(double a)
 688 {
 689     return a;
 690 }
 691
 692 /*! \brief Bitwise andnot for two scalar double variables.
 693  *
 694  * \param a data1
 695  * \param b data2
 696  * \return (~data1) & data2
 697  *
 698  * \note This function might be superficially meaningless, but it helps us to
 699  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 700  *       outside such code.
 701  */
 702 static inline double andNot(double a, double b)
 703 {
 704     union {
 705         double        r;
 706         std::uint64_t i;
 707     } conv1, conv2;
 708
 709     conv1.r = a;
 710     conv2.r = b;
 711
 712     conv1.i = (~conv1.i) & conv2.i;
 713
 714     return conv1.r;
 715 }
 716
 717 /*! \brief Return true if any bits are set in the double variable.
 718  *
 719  * This function is used to handle bitmasks, mainly for exclusions in the
 720  * inner kernels. Note that it will return true even for -0.0 (sign bit set),
 721  * so it is not identical to not-equal.
 722  *
 723  * \param a value
 724  * \return True if any bit in a is nonzero.
 725  *
 726  * \note This function might be superficially meaningless, but it helps us to
 727  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 728  *       outside such code.
 729  */
 730 static inline bool testBits(double a)
 731 {
 732     union {
 733         std::uint64_t i;
 734         double        f;
 735     } conv;
 736
 737     conv.f = a;
 738     return (conv.i != 0);
 739 }
 740
 741 /*! \brief Select from double precision variable where boolean is true.
 742  *
 743  * \param a double variable to select from
 744  * \param mask Boolean selector
 745  * \return  a is selected for true, 0 for false.
 746  *
 747  * \note This function might be superficially meaningless, but it helps us to
 748  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 749  *       outside such code.
 750  */
 751 static inline double selectByMask(double a, bool mask)
 752 {
 753     return mask ? a : 0.0;
 754 }
 755
 756 /*! \brief Select from double precision variable where boolean is false.
 757  *
 758  * \param a double variable to select from
 759  * \param mask Boolean selector
 760  * \return  a is selected for false, 0 for true.
 761  *
 762  * \note This function might be superficially meaningless, but it helps us to
 763  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 764  *       outside such code.
 765  */
 766 static inline double selectByNotMask(double a, bool mask)
 767 {
 768     return mask ? 0.0 : a;
 769 }
 770
 771 /*! \brief Blend double selection.
 772  *
 773  * \param a First source
 774  * \param b Second source
 775  * \param sel Boolean selector
 776  * \return Select b if sel is true, a otherwise.
 777  *
 778  * \note This function might be superficially meaningless, but it helps us to
 779  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 780  *       outside such code.
 781  */
 782 static inline double blend(double a, double b, bool sel)
 783 {
 784     return sel ? b : a;
 785 }
 786
 787 /*! \brief Round single precision doubleing point to integer.
 788  *
 789  * \param a double
 790  * \return Integer format, a rounded to nearest integer.
 791  *
 792  * \note This function might be superficially meaningless, but it helps us to
 793  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 794  *       outside such code.
 795  */
 796 static inline std::int32_t cvtR2I(double a)
 797 {
 798     return static_cast<std::int32_t>(std::round(a));
 799 };
 800
 801 /*! \brief Truncate single precision doubleing point to integer.
 802  *
 803  * \param a double
 804  * \return Integer format, a truncated to integer.
 805  *
 806  * \note This function might be superficially meaningless, but it helps us to
 807  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 808  *       outside such code.
 809  */
 810 static inline std::int32_t cvttR2I(double a)
 811 {
 812     return static_cast<std::int32_t>(std::trunc(a));
 813 };
 814
 815 // We do not have a separate cvtI2R for double, since that would require
 816 // proxy objects. Instead, the float version returns an integer and lets the
 817 // compiler sort out the conversion type.
 818
 819
 820 /*! \brief Convert float to double (mimicks SIMD conversion)
 821  *
 822  * \param a float
 823  * \return a, as double double
 824  *
 825  * \note This function might be superficially meaningless, but it helps us to
 826  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 827  *       outside such code.
 828  */
 829 static inline double cvtF2D(float a)
 830 {
 831     return a;
 832 }
 833
 834 /*! \brief Convert double to float (mimicks SIMD conversion)
 835  *
 836  * \param a double
 837  * \return a, as float
 838  *
 839  * \note This function might be superficially meaningless, but it helps us to
 840  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 841  *       outside such code.
 842  */
 843 static inline float cvtD2F(double a)
 844 {
 845     return a;
 846 }
 847
 848 /************************************************
 849  *   Integer functions mimicking SIMD versions  *
 850  ************************************************/
 851
 852 /*! \brief Store contents of integer variable to aligned memory m.
 853  *
 854  * \param[out] m Pointer to memory.
 855  * \param a integer variable to store
 856  *
 857  * \note This function might be superficially meaningless, but it helps us to
 858  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 859  *       outside such code.
 860  */
 861 static inline void store(std::int32_t* m, std::int32_t a)
 862 {
 863     *m = a;
 864 }
 865
 866 /*! \brief Store contents of integer variable to unaligned memory m.
 867  *
 868  * \param[out] m Pointer to memory, no alignment requirement.
 869  * \param a integer variable to store.
 870  *
 871  * \note This function might be superficially meaningless, but it helps us to
 872  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 873  *       outside such code.
 874  */
 875 static inline void storeU(std::int32_t* m, std::int32_t a)
 876 {
 877     *m = a;
 878 }
 879
 880 /*! \brief Bitwise andnot for two scalar integer variables.
 881  *
 882  * \param a data1
 883  * \param b data2
 884  * \return (~data1) & data2
 885  *
 886  * \note This function might be superficially meaningless, but it helps us to
 887  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 888  *       outside such code.
 889  */
 890 static inline std::int32_t andNot(std::int32_t a, std::int32_t b)
 891 {
 892     return ~a & b;
 893 }
 894
 895 /*! \brief Return true if any bits are set in the integer variable.
 896  *
 897  * This function is used to handle bitmasks, mainly for exclusions in the
 898  * inner kernels.
 899  *
 900  * \param a value
 901  * \return True if any bit in a is nonzero.
 902  *
 903  * \note This function might be superficially meaningless, but it helps us to
 904  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 905  *       outside such code.
 906  */
 907 static inline bool testBits(std::int32_t a)
 908 {
 909     return (a != 0);
 910 }
 911
 912 /*! \brief Select from integer variable where boolean is true.
 913  *
 914  * \param a Integer variable to select from
 915  * \param mask Boolean selector
 916  * \return  a is selected for true, 0 for false.
 917  *
 918  * \note This function might be superficially meaningless, but it helps us to
 919  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 920  *       outside such code.
 921  */
 922 static inline std::int32_t selectByMask(std::int32_t a, bool mask)
 923 {
 924     return mask ? a : 0;
 925 }
 926
 927 /*! \brief Select from integer variable where boolean is false.
 928  *
 929  * \param a Integer variable to select from
 930  * \param mask Boolean selector
 931  * \return  a is selected for false, 0 for true.
 932  *
 933  * \note This function might be superficially meaningless, but it helps us to
 934  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 935  *       outside such code.
 936  */
 937 static inline std::int32_t selectByNotMask(std::int32_t a, bool mask)
 938 {
 939     return mask ? 0 : a;
 940 }
 941
 942 /*! \brief Blend integer selection.
 943  *
 944  * \param a First source
 945  * \param b Second source
 946  * \param sel Boolean selector
 947  * \return Select b if sel is true, a otherwise.
 948  *
 949  * \note This function might be superficially meaningless, but it helps us to
 950  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 951  *       outside such code.
 952  */
 953 static inline std::int32_t blend(std::int32_t a, std::int32_t b, bool sel)
 954 {
 955     return sel ? b : a;
 956 }
 957
 958 /*! \brief Just return a boolean (mimicks SIMD real-to-int bool conversions)
 959  *
 960  * \param a  boolean
 961  * \return same boolean
 962  *
 963  * \note This function might be superficially meaningless, but it helps us to
 964  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 965  *       outside such code.
 966  */
 967 static inline bool cvtB2IB(bool a)
 968 {
 969     return a;
 970 }
 971
 972 /*! \brief Just return a boolean (mimicks SIMD int-to-real bool conversions)
 973  *
 974  * \param a  boolean
 975  * \return same boolean
 976  *
 977  * \note This function might be superficially meaningless, but it helps us to
 978  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 979  *       outside such code.
 980  */
 981 static inline bool cvtIB2B(bool a)
 982 {
 983     return a;
 984 }
 985
 986 } // namespace gmx
 987
 988
 989 #endif // GMX_SIMD_SCALAR_FLOAT_H