src/gromacs/simd/scalar/scalar.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2016,2017,2018,2019,2020, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 #ifndef GMX_SIMD_SCALAR_H
  36 #define GMX_SIMD_SCALAR_H
  37
  38 #include <cmath>
  39 #include <cstdint>
  40 #include <cstdlib>
  41
  42 #include <algorithm>
  43
  44 /*! \libinternal \file
  45  *
  46  * \brief Scalar float functions corresponding to GROMACS SIMD functions
  47  *
  48  * These versions make it possible to write functions that are templated with
  49  * either a SIMD or scalar type. While some of these functions might not appear
  50  * SIMD-specific, we have placed them here because the only reason to use these
  51  * instead of generic function is in templated combined SIMD/non-SIMD code.
  52  *
  53  * There are a handful of limitations, in particular that it is impossible
  54  * to overload the bitwise logical operators on built-in types.
  55  *
  56  * \author Erik Lindahl <erik.lindahl@gmail.com>
  57  *
  58  * \inlibraryapi
  59  * \ingroup module_simd
  60  */
  61
  62 namespace gmx
  63 {
  64
  65 /************************************************************************
  66  *   Single-precision floating point functions mimicking SIMD versions  *
  67  ************************************************************************/
  68
  69 /*! \brief Store contents of float variable to aligned memory m.
  70  *
  71  * \param[out] m Pointer to memory.
  72  * \param a float variable to store
  73  *
  74  * \note This function might be superficially meaningless, but it helps us to
  75  *       write templated SIMD/non-SIMD code. For clarity it should not be used
  76  *       outside such code.
  77  */
  78 static inline void store(float* m, float a)
  79 {
  80     *m = a;
  81 }
  82
  83 /*! \brief Store contents of float variable to unaligned memory m.
  84  *
  85  * \param[out] m Pointer to memory, no alignment requirement.
  86  * \param a float variable to store.
  87  *
  88  * \note This function might be superficially meaningless, but it helps us to
  89  *       write templated SIMD/non-SIMD code. For clarity it should not be used
  90  *       outside such code.
  91  */
  92 static inline void storeU(float* m, float a)
  93 {
  94     *m = a;
  95 }
  96
  97 // We cannot overload the logical operators and, or, andNot, xor for
  98 // built-in types.
  99
 100 /*! \brief Float Fused-multiply-add. Result is a*b + c.
 101  *
 102  * \param a factor1
 103  * \param b factor2
 104  * \param c term
 105  * \return a*b + c
 106  *
 107  * \note This function might be superficially meaningless, but it helps us to
 108  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 109  *       outside such code.
 110  */
 111 static inline float fma(float a, float b, float c)
 112 {
 113     // Note that we purposely do not use the single-rounding std::fma
 114     // as that can be very slow without hardware support
 115     return a * b + c;
 116 }
 117
 118 /*! \brief Float Fused-multiply-subtract. Result is a*b - c.
 119  *
 120  * \param a factor1
 121  * \param b factor2
 122  * \param c term
 123  * \return a*b - c
 124  *
 125  * \note This function might be superficially meaningless, but it helps us to
 126  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 127  *       outside such code.
 128  */
 129 static inline float fms(float a, float b, float c)
 130 {
 131     return a * b - c;
 132 }
 133
 134 /*! \brief Float Fused-negated-multiply-add. Result is -a*b + c.
 135  *
 136  * \param a factor1
 137  * \param b factor2
 138  * \param c term
 139  * \return -a*b + c
 140  *
 141  * \note This function might be superficially meaningless, but it helps us to
 142  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 143  *       outside such code.
 144  */
 145 static inline float fnma(float a, float b, float c)
 146 {
 147     return c - a * b;
 148 }
 149
 150 /*! \brief Float Fused-negated-multiply-subtract. Result is -a*b - c.
 151  *
 152  * \param a factor1
 153  * \param b factor2
 154  * \param c term
 155  * \return -a*b - c
 156  *
 157  * \note This function might be superficially meaningless, but it helps us to
 158  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 159  *       outside such code.
 160  */
 161 static inline float fnms(float a, float b, float c)
 162 {
 163     return -a * b - c;
 164 }
 165
 166 /*! \brief Add two float variables, masked version.
 167  *
 168  * \param a term1
 169  * \param b term2
 170  * \param m mask
 171  * \return a+b where mask is true, a otherwise.
 172  *
 173  * \note This function might be superficially meaningless, but it helps us to
 174  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 175  *       outside such code.
 176  */
 177 static inline float maskAdd(float a, float b, float m)
 178 {
 179     return a + (m != 0.0F ? b : 0.0F);
 180 }
 181
 182 /*! \brief Multiply two float variables, masked version.
 183  *
 184  * \param a factor1
 185  * \param b factor2
 186  * \param m mask
 187  * \return a*b where mask is true, 0.0 otherwise.
 188  *
 189  * \note This function might be superficially meaningless, but it helps us to
 190  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 191  *       outside such code.
 192  */
 193 static inline float maskzMul(float a, float b, float m)
 194 {
 195     return m != 0.0F ? (a * b) : 0.0F;
 196 }
 197
 198 /*! \brief Float fused multiply-add, masked version.
 199  *
 200  * \param a factor1
 201  * \param b factor2
 202  * \param c term
 203  * \param m mask
 204  * \return a*b+c where mask is true, 0.0 otherwise.
 205  *
 206  * \note This function might be superficially meaningless, but it helps us to
 207  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 208  *       outside such code.
 209  */
 210 static inline float maskzFma(float a, float b, float c, float m)
 211 {
 212     return m != 0.0F ? (a * b + c) : 0.0F;
 213 }
 214
 215 /*! \brief Float 1.0/x, masked version.
 216  *
 217  * \param x Argument, x>0 for entries where mask is true.
 218  * \param m Mask
 219  * \return 1/x. The result for masked-out entries will be 0.0.
 220  *
 221  * \note This function might be superficially meaningless, but it helps us to
 222  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 223  *       outside such code.
 224  */
 225 static inline float gmx_simdcall maskzRcp(float x, float m)
 226 {
 227     return m != 0.0F ? 1.0F / x : 0.0F;
 228 }
 229
 230 /*! \brief Float Floating-point abs().
 231  *
 232  * \param a any floating point values
 233  * \return abs(a) for each element.
 234  *
 235  * \note This function might be superficially meaningless, but it helps us to
 236  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 237  *       outside such code.
 238  */
 239 static inline float abs(float a)
 240 {
 241     return std::abs(a);
 242 }
 243
 244 /*! \brief Set each float element to the largest from two variables.
 245  *
 246  * \param a Any floating-point value
 247  * \param b Any floating-point value
 248  * \return max(a,b) for each element.
 249  *
 250  * \note This function might be superficially meaningless, but it helps us to
 251  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 252  *       outside such code.
 253  */
 254 static inline float max(float a, float b)
 255 {
 256     return std::max(a, b);
 257 }
 258
 259 /*! \brief Set each float element to the smallest from two variables.
 260  *
 261  * \param a Any floating-point value
 262  * \param b Any floating-point value
 263  * \return min(a,b) for each element.
 264  *
 265  * \note This function might be superficially meaningless, but it helps us to
 266  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 267  *       outside such code.
 268  */
 269 static inline float min(float a, float b)
 270 {
 271     return std::min(a, b);
 272 }
 273
 274 /*! \brief Float round to nearest integer value (in floating-point format).
 275  *
 276  * \param a Any floating-point value
 277  * \return The nearest integer, represented in floating-point format.
 278  *
 279  * \note This function might be superficially meaningless, but it helps us to
 280  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 281  *       outside such code.
 282  */
 283 static inline float round(float a)
 284 {
 285     return std::round(a);
 286 }
 287
 288 /*! \brief Truncate float, i.e. round towards zero - common hardware instruction.
 289  *
 290  * \param a Any floating-point value
 291  * \return Integer rounded towards zero, represented in floating-point format.
 292  *
 293  * \note This function might be superficially meaningless, but it helps us to
 294  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 295  *       outside such code.
 296  */
 297 static inline float trunc(float a)
 298 {
 299     return std::trunc(a);
 300 }
 301
 302 /*! \brief Return sum of all elements in float variable (i.e., the variable itself).
 303  *
 304  * \param a variable to reduce/sum.
 305  * \return The argument variable itself.
 306  *
 307  * \note This function might be superficially meaningless, but it helps us to
 308  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 309  *       outside such code.
 310  */
 311 static inline float reduce(float a)
 312 {
 313     return a;
 314 }
 315
 316 /*! \brief Bitwise andnot for two scalar float variables.
 317  *
 318  * \param a data1
 319  * \param b data2
 320  * \return (~data1) & data2
 321  *
 322  * \note This function might be superficially meaningless, but it helps us to
 323  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 324  *       outside such code.
 325  */
 326 static inline float andNot(float a, float b)
 327 {
 328     union {
 329         float         r;
 330         std::uint32_t i;
 331     } conv1, conv2;
 332
 333     conv1.r = a;
 334     conv2.r = b;
 335
 336     conv1.i = (~conv1.i) & conv2.i;
 337
 338     return conv1.r;
 339 }
 340
 341 /*! \brief Return true if any bits are set in the float variable.
 342  *
 343  * This function is used to handle bitmasks, mainly for exclusions in the
 344  * inner kernels. Note that it will return true even for -0.0f (sign bit set),
 345  * so it is not identical to not-equal.
 346  *
 347  * \param a value
 348  * \return True if any bit in a is nonzero.
 349  *
 350  * \note This function might be superficially meaningless, but it helps us to
 351  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 352  *       outside such code.
 353  */
 354 static inline bool testBits(float a)
 355 {
 356     union {
 357         std::uint32_t i;
 358         float         f;
 359     } conv;
 360
 361     conv.f = a;
 362     return (conv.i != 0);
 363 }
 364
 365 /*! \brief Returns if the boolean is true.
 366  *
 367  * \param a Logical variable.
 368  * \return true if a is true, otherwise false.
 369  *
 370  * \note This function might be superficially meaningless, but it helps us to
 371  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 372  *       outside such code.
 373  */
 374 static inline bool anyTrue(bool a)
 375 {
 376     return a;
 377 }
 378
 379 /*! \brief Select from single precision variable where boolean is true.
 380  *
 381  * \param a Floating-point variable to select from
 382  * \param mask Boolean selector
 383  * \return  a is selected for true, 0 for false.
 384  *
 385  * \note This function might be superficially meaningless, but it helps us to
 386  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 387  *       outside such code.
 388  */
 389 static inline float selectByMask(float a, bool mask)
 390 {
 391     return mask ? a : 0.0F;
 392 }
 393
 394 /*! \brief Select from single precision variable where boolean is false.
 395  *
 396  * \param a Floating-point variable to select from
 397  * \param mask Boolean selector
 398  * \return  a is selected for false, 0 for true.
 399  *
 400  * \note This function might be superficially meaningless, but it helps us to
 401  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 402  *       outside such code.
 403  */
 404 static inline float selectByNotMask(float a, bool mask)
 405 {
 406     return mask ? 0.0F : a;
 407 }
 408
 409 /*! \brief Blend float selection.
 410  *
 411  * \param a First source
 412  * \param b Second source
 413  * \param sel Boolean selector
 414  * \return Select b if sel is true, a otherwise.
 415  *
 416  * \note This function might be superficially meaningless, but it helps us to
 417  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 418  *       outside such code.
 419  */
 420 static inline float blend(float a, float b, bool sel)
 421 {
 422     return sel ? b : a;
 423 }
 424
 425 /*! \brief Round single precision floating point to integer.
 426  *
 427  * \param a float
 428  * \return Integer format, a rounded to nearest integer.
 429  *
 430  * \note This function might be superficially meaningless, but it helps us to
 431  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 432  *       outside such code.
 433  */
 434 static inline std::int32_t cvtR2I(float a)
 435 {
 436     return static_cast<std::int32_t>(std::round(a));
 437 };
 438
 439 /*! \brief Truncate single precision floating point to integer.
 440  *
 441  * \param a float
 442  * \return Integer format, a truncated to integer.
 443  *
 444  * \note This function might be superficially meaningless, but it helps us to
 445  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 446  *       outside such code.
 447  */
 448 static inline std::int32_t cvttR2I(float a)
 449 {
 450     return static_cast<std::int32_t>(std::trunc(a));
 451 };
 452
 453 /*! \brief Return integer.
 454  *
 455  * This function mimicks the SIMD integer-to-real conversion routines. By
 456  * simply returning an integer, we let the compiler sort out whether the
 457  * conversion should be to float or double rather than using proxy objects.
 458  *
 459  * \param a integer
 460  * \return same value (a)
 461  *
 462  * \note This function might be superficially meaningless, but it helps us to
 463  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 464  *       outside such code.
 465  */
 466 static inline std::int32_t cvtI2R(std::int32_t a)
 467 {
 468     return a;
 469 }
 470
 471 /************************************************************************
 472  *   Double-precision floating point functions mimicking SIMD versions  *
 473  ************************************************************************/
 474
 475 /*! \brief Store contents of double variable to aligned memory m.
 476  *
 477  * \param[out] m Pointer to memory.
 478  * \param a double variable to store
 479  *
 480  * \note This function might be superficially meaningless, but it helps us to
 481  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 482  *       outside such code.
 483  */
 484 static inline void store(double* m, double a)
 485 {
 486     *m = a;
 487 }
 488
 489 /*! \brief Store contents of double variable to unaligned memory m.
 490  *
 491  * \param[out] m Pointer to memory, no alignment requirement.
 492  * \param a double variable to store.
 493  *
 494  * \note This function might be superficially meaningless, but it helps us to
 495  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 496  *       outside such code.
 497  */
 498 static inline void storeU(double* m, double a)
 499 {
 500     *m = a;
 501 }
 502
 503 // We cannot overload the logical operators and, or, andNot, xor for
 504 // built-in types.
 505
 506 /*! \brief double Fused-multiply-add. Result is a*b + c.
 507  *
 508  * \param a factor1
 509  * \param b factor2
 510  * \param c term
 511  * \return a*b + c
 512  *
 513  * \note This function might be superficially meaningless, but it helps us to
 514  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 515  *       outside such code.
 516  */
 517 static inline double fma(double a, double b, double c)
 518 {
 519     // Note that we purposely do not use the single-rounding std::fma
 520     // as that can be very slow without hardware support
 521     return a * b + c;
 522 }
 523
 524 /*! \brief double Fused-multiply-subtract. Result is a*b - c.
 525  *
 526  * \param a factor1
 527  * \param b factor2
 528  * \param c term
 529  * \return a*b - c
 530  *
 531  * \note This function might be superficially meaningless, but it helps us to
 532  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 533  *       outside such code.
 534  */
 535 static inline double fms(double a, double b, double c)
 536 {
 537     return a * b - c;
 538 }
 539
 540 /*! \brief double Fused-negated-multiply-add. Result is - a*b + c.
 541  *
 542  * \param a factor1
 543  * \param b factor2
 544  * \param c term
 545  * \return -a*b + c
 546  *
 547  * \note This function might be superficially meaningless, but it helps us to
 548  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 549  *       outside such code.
 550  */
 551 static inline double fnma(double a, double b, double c)
 552 {
 553     return c - a * b;
 554 }
 555
 556 /*! \brief double Fused-negated-multiply-subtract. Result is -a*b - c.
 557  *
 558  * \param a factor1
 559  * \param b factor2
 560  * \param c term
 561  * \return -a*b - c
 562  *
 563  * \note This function might be superficially meaningless, but it helps us to
 564  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 565  *       outside such code.
 566  */
 567 static inline double fnms(double a, double b, double c)
 568 {
 569     return -a * b - c;
 570 }
 571
 572 /*! \brief Add two double variables, masked version.
 573  *
 574  * \param a term1
 575  * \param b term2
 576  * \param m mask
 577  * \return a+b where mask is true, a otherwise.
 578  *
 579  * \note This function might be superficially meaningless, but it helps us to
 580  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 581  *       outside such code.
 582  */
 583 static inline double maskAdd(double a, double b, double m)
 584 {
 585     return a + (m != 0.0 ? b : 0.0);
 586 }
 587
 588 /*! \brief Multiply two double variables, masked version.
 589  *
 590  * \param a factor1
 591  * \param b factor2
 592  * \param m mask
 593  * \return a*b where mask is true, 0.0 otherwise.
 594  *
 595  * \note This function might be superficially meaningless, but it helps us to
 596  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 597  *       outside such code.
 598  */
 599 static inline double maskzMul(double a, double b, double m)
 600 {
 601     return m != 0.0 ? (a * b) : 0.0;
 602 }
 603
 604 /*! \brief double fused multiply-add, masked version.
 605  *
 606  * \param a factor1
 607  * \param b factor2
 608  * \param c term
 609  * \param m mask
 610  * \return a*b+c where mask is true, 0.0 otherwise.
 611  *
 612  * \note This function might be superficially meaningless, but it helps us to
 613  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 614  *       outside such code.
 615  */
 616 static inline double maskzFma(double a, double b, double c, double m)
 617 {
 618     return m != 0.0 ? (a * b + c) : 0.0;
 619 }
 620
 621 /*! \brief Double 1.0/x, masked version.
 622  *
 623  * \param x Argument, x>0 for entries where mask is true.
 624  * \param m Mask
 625  * \return Approximation of 1/x. The result for masked-out entries will be 0.0.
 626  *
 627  * \note This function might be superficially meaningless, but it helps us to
 628  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 629  *       outside such code.
 630  */
 631 static inline double gmx_simdcall maskzRcp(double x, double m)
 632 {
 633     return m != 0.0 ? 1.0 / x : 0.0;
 634 }
 635
 636 /*! \brief double doubleing-point abs().
 637  *
 638  * \param a any doubleing point values
 639  * \return abs(a) for each element.
 640  *
 641  * \note This function might be superficially meaningless, but it helps us to
 642  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 643  *       outside such code.
 644  */
 645 static inline double abs(double a)
 646 {
 647     return std::abs(a);
 648 }
 649
 650 /*! \brief Set each double element to the largest from two variables.
 651  *
 652  * \param a Any doubleing-point value
 653  * \param b Any doubleing-point value
 654  * \return max(a,b) for each element.
 655  *
 656  * \note This function might be superficially meaningless, but it helps us to
 657  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 658  *       outside such code.
 659  */
 660 static inline double max(double a, double b)
 661 {
 662     return std::max(a, b);
 663 }
 664
 665 /*! \brief Set each double element to the smallest from two variables.
 666  *
 667  * \param a Any doubleing-point value
 668  * \param b Any doubleing-point value
 669  * \return min(a,b) for each element.
 670  *
 671  * \note This function might be superficially meaningless, but it helps us to
 672  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 673  *       outside such code.
 674  */
 675 static inline double min(double a, double b)
 676 {
 677     return std::min(a, b);
 678 }
 679
 680 /*! \brief double round to nearest integer value (in doubleing-point format).
 681  *
 682  * \param a Any doubleing-point value
 683  * \return The nearest integer, represented in doubleing-point format.
 684  *
 685  * \note This function might be superficially meaningless, but it helps us to
 686  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 687  *       outside such code.
 688  */
 689 static inline double round(double a)
 690 {
 691     return std::round(a);
 692 }
 693
 694 /*! \brief Truncate double, i.e. round towards zero - common hardware instruction.
 695  *
 696  * \param a Any doubleing-point value
 697  * \return Integer rounded towards zero, represented in doubleing-point format.
 698  *
 699  * \note This function might be superficially meaningless, but it helps us to
 700  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 701  *       outside such code.
 702  */
 703 static inline double trunc(double a)
 704 {
 705     return std::trunc(a);
 706 }
 707
 708 /*! \brief Return sum of all elements in double variable (i.e., the variable itself).
 709  *
 710  * \param a variable to reduce/sum.
 711  * \return The argument variable itself.
 712  *
 713  * \note This function might be superficially meaningless, but it helps us to
 714  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 715  *       outside such code.
 716  */
 717 static inline double reduce(double a)
 718 {
 719     return a;
 720 }
 721
 722 /*! \brief Bitwise andnot for two scalar double variables.
 723  *
 724  * \param a data1
 725  * \param b data2
 726  * \return (~data1) & data2
 727  *
 728  * \note This function might be superficially meaningless, but it helps us to
 729  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 730  *       outside such code.
 731  */
 732 static inline double andNot(double a, double b)
 733 {
 734     union {
 735         double        r;
 736         std::uint64_t i;
 737     } conv1, conv2;
 738
 739     conv1.r = a;
 740     conv2.r = b;
 741
 742     conv1.i = (~conv1.i) & conv2.i;
 743
 744     return conv1.r;
 745 }
 746
 747 /*! \brief Return true if any bits are set in the double variable.
 748  *
 749  * This function is used to handle bitmasks, mainly for exclusions in the
 750  * inner kernels. Note that it will return true even for -0.0 (sign bit set),
 751  * so it is not identical to not-equal.
 752  *
 753  * \param a value
 754  * \return True if any bit in a is nonzero.
 755  *
 756  * \note This function might be superficially meaningless, but it helps us to
 757  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 758  *       outside such code.
 759  */
 760 static inline bool testBits(double a)
 761 {
 762     union {
 763         std::uint64_t i;
 764         double        f;
 765     } conv;
 766
 767     conv.f = a;
 768     return (conv.i != 0);
 769 }
 770
 771 /*! \brief Select from double precision variable where boolean is true.
 772  *
 773  * \param a double variable to select from
 774  * \param mask Boolean selector
 775  * \return  a is selected for true, 0 for false.
 776  *
 777  * \note This function might be superficially meaningless, but it helps us to
 778  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 779  *       outside such code.
 780  */
 781 static inline double selectByMask(double a, bool mask)
 782 {
 783     return mask ? a : 0.0;
 784 }
 785
 786 /*! \brief Select from double precision variable where boolean is false.
 787  *
 788  * \param a double variable to select from
 789  * \param mask Boolean selector
 790  * \return  a is selected for false, 0 for true.
 791  *
 792  * \note This function might be superficially meaningless, but it helps us to
 793  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 794  *       outside such code.
 795  */
 796 static inline double selectByNotMask(double a, bool mask)
 797 {
 798     return mask ? 0.0 : a;
 799 }
 800
 801 /*! \brief Blend double selection.
 802  *
 803  * \param a First source
 804  * \param b Second source
 805  * \param sel Boolean selector
 806  * \return Select b if sel is true, a otherwise.
 807  *
 808  * \note This function might be superficially meaningless, but it helps us to
 809  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 810  *       outside such code.
 811  */
 812 static inline double blend(double a, double b, bool sel)
 813 {
 814     return sel ? b : a;
 815 }
 816
 817 /*! \brief Round single precision doubleing point to integer.
 818  *
 819  * \param a double
 820  * \return Integer format, a rounded to nearest integer.
 821  *
 822  * \note This function might be superficially meaningless, but it helps us to
 823  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 824  *       outside such code.
 825  */
 826 static inline std::int32_t cvtR2I(double a)
 827 {
 828     return static_cast<std::int32_t>(std::round(a));
 829 };
 830
 831 /*! \brief Truncate single precision doubleing point to integer.
 832  *
 833  * \param a double
 834  * \return Integer format, a truncated to integer.
 835  *
 836  * \note This function might be superficially meaningless, but it helps us to
 837  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 838  *       outside such code.
 839  */
 840 static inline std::int32_t cvttR2I(double a)
 841 {
 842     return static_cast<std::int32_t>(std::trunc(a));
 843 };
 844
 845 // We do not have a separate cvtI2R for double, since that would require
 846 // proxy objects. Instead, the float version returns an integer and lets the
 847 // compiler sort out the conversion type.
 848
 849
 850 /*! \brief Convert float to double (mimicks SIMD conversion)
 851  *
 852  * \param a float
 853  * \return a, as double double
 854  *
 855  * \note This function might be superficially meaningless, but it helps us to
 856  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 857  *       outside such code.
 858  */
 859 static inline double cvtF2D(float a)
 860 {
 861     return a;
 862 }
 863
 864 /*! \brief Convert double to float (mimicks SIMD conversion)
 865  *
 866  * \param a double
 867  * \return a, as float
 868  *
 869  * \note This function might be superficially meaningless, but it helps us to
 870  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 871  *       outside such code.
 872  */
 873 static inline float cvtD2F(double a)
 874 {
 875     return a;
 876 }
 877
 878 /************************************************
 879  *   Integer functions mimicking SIMD versions  *
 880  ************************************************/
 881
 882 /*! \brief Store contents of integer variable to aligned memory m.
 883  *
 884  * \param[out] m Pointer to memory.
 885  * \param a integer variable to store
 886  *
 887  * \note This function might be superficially meaningless, but it helps us to
 888  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 889  *       outside such code.
 890  */
 891 static inline void store(std::int32_t* m, std::int32_t a)
 892 {
 893     *m = a;
 894 }
 895
 896 /*! \brief Store contents of integer variable to unaligned memory m.
 897  *
 898  * \param[out] m Pointer to memory, no alignment requirement.
 899  * \param a integer variable to store.
 900  *
 901  * \note This function might be superficially meaningless, but it helps us to
 902  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 903  *       outside such code.
 904  */
 905 static inline void storeU(std::int32_t* m, std::int32_t a)
 906 {
 907     *m = a;
 908 }
 909
 910 /*! \brief Bitwise andnot for two scalar integer variables.
 911  *
 912  * \param a data1
 913  * \param b data2
 914  * \return (~data1) & data2
 915  *
 916  * \note This function might be superficially meaningless, but it helps us to
 917  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 918  *       outside such code.
 919  */
 920 static inline std::int32_t andNot(std::int32_t a, std::int32_t b)
 921 {
 922     return ~a & b;
 923 }
 924
 925 /*! \brief Return true if any bits are set in the integer variable.
 926  *
 927  * This function is used to handle bitmasks, mainly for exclusions in the
 928  * inner kernels.
 929  *
 930  * \param a value
 931  * \return True if any bit in a is nonzero.
 932  *
 933  * \note This function might be superficially meaningless, but it helps us to
 934  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 935  *       outside such code.
 936  */
 937 static inline bool testBits(std::int32_t a)
 938 {
 939     return (a != 0);
 940 }
 941
 942 /*! \brief Select from integer variable where boolean is true.
 943  *
 944  * \param a Integer variable to select from
 945  * \param mask Boolean selector
 946  * \return  a is selected for true, 0 for false.
 947  *
 948  * \note This function might be superficially meaningless, but it helps us to
 949  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 950  *       outside such code.
 951  */
 952 static inline std::int32_t selectByMask(std::int32_t a, bool mask)
 953 {
 954     return mask ? a : 0;
 955 }
 956
 957 /*! \brief Select from integer variable where boolean is false.
 958  *
 959  * \param a Integer variable to select from
 960  * \param mask Boolean selector
 961  * \return  a is selected for false, 0 for true.
 962  *
 963  * \note This function might be superficially meaningless, but it helps us to
 964  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 965  *       outside such code.
 966  */
 967 static inline std::int32_t selectByNotMask(std::int32_t a, bool mask)
 968 {
 969     return mask ? 0 : a;
 970 }
 971
 972 /*! \brief Blend integer selection.
 973  *
 974  * \param a First source
 975  * \param b Second source
 976  * \param sel Boolean selector
 977  * \return Select b if sel is true, a otherwise.
 978  *
 979  * \note This function might be superficially meaningless, but it helps us to
 980  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 981  *       outside such code.
 982  */
 983 static inline std::int32_t blend(std::int32_t a, std::int32_t b, bool sel)
 984 {
 985     return sel ? b : a;
 986 }
 987
 988 /*! \brief Just return a boolean (mimicks SIMD real-to-int bool conversions)
 989  *
 990  * \param a  boolean
 991  * \return same boolean
 992  *
 993  * \note This function might be superficially meaningless, but it helps us to
 994  *       write templated SIMD/non-SIMD code. For clarity it should not be used
 995  *       outside such code.
 996  */
 997 static inline bool cvtB2IB(bool a)
 998 {
 999     return a;
1000 }
1001
1002 /*! \brief Just return a boolean (mimicks SIMD int-to-real bool conversions)
1003  *
1004  * \param a  boolean
1005  * \return same boolean
1006  *
1007  * \note This function might be superficially meaningless, but it helps us to
1008  *       write templated SIMD/non-SIMD code. For clarity it should not be used
1009  *       outside such code.
1010  */
1011 static inline bool cvtIB2B(bool a)
1012 {
1013     return a;
1014 }
1015
1016 } // namespace gmx
1017
1018
1019 #endif // GMX_SIMD_SCALAR_FLOAT_H