src/gromacs/simd/impl_ibm_vsx/impl_ibm_vsx_simd_double.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2014,2015,2016, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 #ifndef GMX_SIMD_IMPLEMENTATION_IBM_VSX_SIMD_DOUBLE_H
  37 #define GMX_SIMD_IMPLEMENTATION_IBM_VSX_SIMD_DOUBLE_H
  38
  39 #include "config.h"
  40
  41 #include "gromacs/utility/basedefinitions.h"
  42
  43 #include "impl_ibm_vsx_definitions.h"
  44
  45 namespace gmx
  46 {
  47
  48 class SimdDouble
  49 {
  50     public:
  51         SimdDouble() {}
  52
  53         // gcc-4.9 does not recognize that we use the parameter
  54         SimdDouble(double gmx_unused d) : simdInternal_(vec_splats(d)) {}
  55
  56         // Internal utility constructor to simplify return statements
  57         SimdDouble(__vector double simd) : simdInternal_(simd) {}
  58
  59         __vector double  simdInternal_;
  60 };
  61
  62 class SimdDInt32
  63 {
  64     public:
  65         SimdDInt32() {}
  66
  67         // gcc-4.9 does not recognize that we use the parameter
  68         SimdDInt32(std::int32_t gmx_unused i) : simdInternal_(vec_splats(i)) {}
  69
  70         // Internal utility constructor to simplify return statements
  71         SimdDInt32(__vector signed int simd) : simdInternal_(simd) {}
  72
  73         __vector signed int  simdInternal_;
  74 };
  75
  76 class SimdDBool
  77 {
  78     public:
  79         SimdDBool() {}
  80
  81         SimdDBool(bool b) : simdInternal_(reinterpret_cast<__vector vsxBool long long>(vec_splats( b ? 0xFFFFFFFFFFFFFFFFULL : 0))) {}
  82
  83         // Internal utility constructor to simplify return statements
  84         SimdDBool(__vector vsxBool long long simd) : simdInternal_(simd) {}
  85
  86         __vector vsxBool long long simdInternal_;
  87 };
  88
  89 class SimdDIBool
  90 {
  91     public:
  92         SimdDIBool() {}
  93
  94         SimdDIBool(bool b) : simdInternal_(reinterpret_cast<__vector vsxBool int>(vec_splats( b ? 0xFFFFFFFF : 0))) {}
  95
  96         // Internal utility constructor to simplify return statements
  97         SimdDIBool(__vector vsxBool int simd) : simdInternal_(simd) {}
  98
  99         __vector vsxBool int  simdInternal_;
 100 };
 101
 102 // The VSX load & store operations are a bit of a mess. The interface is different
 103 // for xlc version 12, xlc version 13, and gcc. Long-term IBM recommends
 104 // simply using pointer dereferencing both for aligned and unaligned loads.
 105 // That's nice, but unfortunately xlc still bugs out when the pointer is
 106 // not aligned. Sticking to vec_xl/vec_xst isn't a solution either, since
 107 // that appears to be buggy for some _aligned_ loads :-)
 108 //
 109 // For now, we use pointer dereferencing for all aligned load/stores, and
 110 // for unaligned ones with gcc. On xlc we use vec_xlw4/vec_xstw4 for
 111 // unaligned memory operations. The latest docs recommend using the overloaded
 112 // vec_xl/vec_xst, but that is not supported on xlc version 12. We'll
 113 // revisit things once xlc is a bit more stable - for now you probably want
 114 // to stick to gcc...
 115
 116 static inline SimdDouble gmx_simdcall
 117 simdLoad(const double *m)
 118 {
 119     return {
 120                *reinterpret_cast<const __vector double *>(m)
 121     };
 122 }
 123
 124 static inline void gmx_simdcall
 125 store(double *m, SimdDouble a)
 126 {
 127     *reinterpret_cast<__vector double *>(m) = a.simdInternal_;
 128 }
 129
 130 static inline SimdDouble gmx_simdcall
 131 simdLoadU(const double *m)
 132 {
 133 #if defined(__ibmxl__) || defined(__xlC__)
 134     return {
 135                vec_xlw4(0, const_cast<double *>(m))
 136     }
 137 #else
 138     return {
 139                *reinterpret_cast<const __vector double *>(m)
 140     };
 141 #endif
 142 }
 143
 144 static inline void gmx_simdcall
 145 storeU(double *m, SimdDouble a)
 146 {
 147 #if defined(__ibmxl__) || defined(__xlC__)
 148     vec_xstw4(a.simdInternal_, 0, m);
 149 #else
 150     *reinterpret_cast<__vector double *>(m) = a.simdInternal_;
 151 #endif
 152 }
 153
 154 static inline SimdDouble gmx_simdcall
 155 setZeroD()
 156 {
 157     return {
 158                vec_splats(0.0)
 159     };
 160 }
 161
 162 static inline SimdDInt32 gmx_simdcall
 163 simdLoadDI(const std::int32_t * m)
 164 {
 165     __vector signed int          t0, t1;
 166     const __vector unsigned char perm = { 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19, 16, 17, 18, 19 };
 167     t0 = vec_splats(m[0]);
 168     t1 = vec_splats(m[1]);
 169     return {
 170                vec_perm(t0, t1, perm)
 171     };
 172 }
 173
 174 // gcc-4.9 does not understand that arguments to vec_extract() are used
 175 static inline void gmx_simdcall
 176 store(std::int32_t * m, SimdDInt32 gmx_unused x)
 177 {
 178     m[0] = vec_extract(x.simdInternal_, 0);
 179     m[1] = vec_extract(x.simdInternal_, 2);
 180 }
 181
 182 static inline SimdDInt32 gmx_simdcall
 183 simdLoadUDI(const std::int32_t *m)
 184 {
 185     return simdLoadDI(m);
 186 }
 187
 188 static inline void gmx_simdcall
 189 storeU(std::int32_t * m, SimdDInt32 a)
 190 {
 191     return store(m, a);
 192 }
 193
 194 static inline SimdDInt32 gmx_simdcall
 195 setZeroDI()
 196 {
 197     return {
 198                vec_splats(static_cast<int>(0))
 199     };
 200 }
 201
 202 // gcc-4.9 does not detect that vec_extract() uses its argument
 203 template<int index>
 204 static inline std::int32_t gmx_simdcall
 205 extract(SimdDInt32 gmx_unused a)
 206 {
 207     return vec_extract(a.simdInternal_, 2*index);
 208 }
 209
 210 static inline SimdDouble gmx_simdcall
 211 operator&(SimdDouble a, SimdDouble b)
 212 {
 213     return {
 214                vec_and(a.simdInternal_, b.simdInternal_)
 215     };
 216 }
 217
 218 static inline SimdDouble gmx_simdcall
 219 andNot(SimdDouble a, SimdDouble b)
 220 {
 221     return {
 222                vec_andc(b.simdInternal_, a.simdInternal_)
 223     };
 224 }
 225
 226 static inline SimdDouble gmx_simdcall
 227 operator|(SimdDouble a, SimdDouble b)
 228 {
 229     return {
 230                vec_or(a.simdInternal_, b.simdInternal_)
 231     };
 232 }
 233
 234 static inline SimdDouble gmx_simdcall
 235 operator^(SimdDouble a, SimdDouble b)
 236 {
 237     return {
 238                vec_xor(a.simdInternal_, b.simdInternal_)
 239     };
 240 }
 241
 242 static inline SimdDouble gmx_simdcall
 243 operator+(SimdDouble a, SimdDouble b)
 244 {
 245     return {
 246                vec_add(a.simdInternal_, b.simdInternal_)
 247     };
 248 }
 249
 250 static inline SimdDouble gmx_simdcall
 251 operator-(SimdDouble a, SimdDouble b)
 252 {
 253     return {
 254                vec_sub(a.simdInternal_, b.simdInternal_)
 255     };
 256 }
 257
 258 static inline SimdDouble gmx_simdcall
 259 operator-(SimdDouble x)
 260 {
 261     return {
 262                -x.simdInternal_
 263     };
 264 }
 265
 266 static inline SimdDouble gmx_simdcall
 267 operator*(SimdDouble a, SimdDouble b)
 268 {
 269     return {
 270                vec_mul(a.simdInternal_, b.simdInternal_)
 271     };
 272 }
 273
 274 static inline SimdDouble gmx_simdcall
 275 fma(SimdDouble a, SimdDouble b, SimdDouble c)
 276 {
 277     return {
 278                vec_madd(a.simdInternal_, b.simdInternal_, c.simdInternal_)
 279     };
 280 }
 281
 282 static inline SimdDouble gmx_simdcall
 283 fms(SimdDouble a, SimdDouble b, SimdDouble c)
 284 {
 285     return {
 286                vec_msub(a.simdInternal_, b.simdInternal_, c.simdInternal_)
 287     };
 288 }
 289
 290 static inline SimdDouble gmx_simdcall
 291 fnma(SimdDouble a, SimdDouble b, SimdDouble c)
 292 {
 293     return {
 294                vec_nmsub(a.simdInternal_, b.simdInternal_, c.simdInternal_)
 295     };
 296 }
 297
 298 static inline SimdDouble gmx_simdcall
 299 fnms(SimdDouble a, SimdDouble b, SimdDouble c)
 300 {
 301     return {
 302                vec_nmadd(a.simdInternal_, b.simdInternal_, c.simdInternal_)
 303     };
 304 }
 305
 306 static inline SimdDouble gmx_simdcall
 307 rsqrt(SimdDouble x)
 308 {
 309     return {
 310                vec_rsqrte(x.simdInternal_)
 311     };
 312 }
 313
 314 static inline SimdDouble gmx_simdcall
 315 rcp(SimdDouble x)
 316 {
 317     return {
 318                vec_re(x.simdInternal_)
 319     };
 320 }
 321
 322 static inline SimdDouble gmx_simdcall
 323 maskAdd(SimdDouble a, SimdDouble b, SimdDBool m)
 324 {
 325     return {
 326                vec_add(a.simdInternal_, vec_and(b.simdInternal_, reinterpret_cast<__vector double>(m.simdInternal_)))
 327     };
 328 }
 329
 330 static inline SimdDouble gmx_simdcall
 331 maskzMul(SimdDouble a, SimdDouble b, SimdDBool m)
 332 {
 333     SimdDouble prod = a * b;
 334
 335     return {
 336                vec_and(prod.simdInternal_, reinterpret_cast<__vector double>(m.simdInternal_))
 337     };
 338 }
 339
 340 static inline SimdDouble gmx_simdcall
 341 maskzFma(SimdDouble a, SimdDouble b, SimdDouble c, SimdDBool m)
 342 {
 343     SimdDouble prod = fma(a, b, c);
 344
 345     return {
 346                vec_and(prod.simdInternal_, reinterpret_cast<__vector double>(m.simdInternal_))
 347     };
 348 }
 349
 350 static inline SimdDouble gmx_simdcall
 351 maskzRsqrt(SimdDouble x, SimdDBool m)
 352 {
 353 #ifndef NDEBUG
 354     x.simdInternal_ = vec_sel(vec_splats(1.0f), x.simdInternal_, m.simdInternal_);
 355 #endif
 356     return {
 357                vec_and(vec_rsqrte(x.simdInternal_), reinterpret_cast<__vector double>(m.simdInternal_))
 358     };
 359 }
 360
 361 static inline SimdDouble gmx_simdcall
 362 maskzRcp(SimdDouble x, SimdDBool m)
 363 {
 364 #ifndef NDEBUG
 365     x.simdInternal_ = vec_sel(vec_splats(1.0f), x.simdInternal_, m.simdInternal_);
 366 #endif
 367     return {
 368                vec_and(vec_re(x.simdInternal_), reinterpret_cast<__vector double>(m.simdInternal_))
 369     };
 370 }
 371
 372 static inline SimdDouble gmx_simdcall
 373 abs(SimdDouble x)
 374 {
 375     return {
 376                vec_abs( x.simdInternal_ )
 377     };
 378 }
 379
 380 static inline SimdDouble gmx_simdcall
 381 max(SimdDouble a, SimdDouble b)
 382 {
 383     return {
 384                vec_max(a.simdInternal_, b.simdInternal_)
 385     };
 386 }
 387
 388 static inline SimdDouble gmx_simdcall
 389 min(SimdDouble a, SimdDouble b)
 390 {
 391     return {
 392                vec_min(a.simdInternal_, b.simdInternal_)
 393     };
 394 }
 395
 396 static inline SimdDouble gmx_simdcall
 397 round(SimdDouble x)
 398 {
 399 #if defined(__GNUC__) && !defined(__ibmxl__) && !defined(__xlC__)
 400 // gcc up to at least version 4.9 does not have vec_round() in double precision - use inline asm
 401     __vector double res;
 402     __asm__ ("xvrdpi %x0,%x1" : "=wd" (res) : "wd" (x.simdInternal_));
 403     return {
 404                res
 405     };
 406 #else
 407     return {
 408                vec_round( x.simdInternal_ )
 409     };
 410 #endif
 411 }
 412
 413 static inline SimdDouble gmx_simdcall
 414 trunc(SimdDouble x)
 415 {
 416     return {
 417                vec_trunc( x.simdInternal_ )
 418     };
 419 }
 420
 421 static inline SimdDouble
 422 frexp(SimdDouble value, SimdDInt32 * exponent)
 423 {
 424     const __vector double     exponentMask = reinterpret_cast<__vector double>(vec_splats(0x7FF0000000000000ULL));
 425     const __vector signed int exponentBias = vec_splats(1022);
 426     const __vector double     half         = vec_splats(0.5);
 427     __vector signed int       iExponent;
 428
 429     iExponent               = reinterpret_cast<__vector signed int>(vec_and(value.simdInternal_, exponentMask));
 430     // The data is in the upper half of each double (corresponding to elements 1 and 3).
 431     // First shift 52-32=20bits, and then permute to swap element 0 with 1 and element 2 with 3
 432     // For big endian they are in opposite order, so then we simply skip the swap.
 433     iExponent               = vec_sr(iExponent, vec_splats(20U));
 434 #ifndef __BIG_ENDIAN__
 435     const __vector unsigned char perm = {4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11};
 436     iExponent               = vec_perm(iExponent, iExponent, perm);
 437 #endif
 438     iExponent               = vec_sub(iExponent, exponentBias);
 439     exponent->simdInternal_ = iExponent;
 440
 441     return {
 442                vec_or(vec_andc(value.simdInternal_, exponentMask), half)
 443     };
 444 }
 445
 446 static inline SimdDouble
 447 ldexp(SimdDouble value, SimdDInt32 exponent)
 448 {
 449     const __vector signed int    exponentBias = vec_splats(1023);
 450     __vector signed int          iExponent;
 451 #ifdef __BIG_ENDIAN__
 452     const __vector unsigned char perm = {0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 16, 17, 18, 19};
 453 #else
 454     const __vector unsigned char perm = {16, 17, 18, 19, 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11};
 455 #endif
 456
 457     iExponent = vec_add(exponent.simdInternal_, exponentBias);
 458     // exponent is now present in pairs of integers; 0011.
 459     // Elements 0/2 already correspond to the upper half of each double,
 460     // so we only need to shift by another 52-32=20 bits.
 461     // The remaining elements are set to zero.
 462     iExponent = vec_sl(iExponent, vec_splats(20U));
 463     iExponent = vec_perm(iExponent, vec_splats(0), perm);
 464
 465     return {
 466                vec_mul(value.simdInternal_, reinterpret_cast<__vector double>(iExponent))
 467     };
 468 }
 469
 470 static inline double gmx_simdcall
 471 reduce(SimdDouble x)
 472 {
 473     const __vector unsigned char perm = { 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7 };
 474 #ifdef __xlC__
 475     /* old xlc version 12 does not understand vec_perm() with double arguments */
 476     x.simdInternal_ = vec_add(x.simdInternal_,
 477                               reinterpret_cast<__vector double>(vec_perm(reinterpret_cast<__vector signed int>(x.simdInternal_),
 478                                                                          reinterpret_cast<__vector signed int>(x.simdInternal_), perm)));
 479 #else
 480     x.simdInternal_ = vec_add(x.simdInternal_, vec_perm(x.simdInternal_, x.simdInternal_, perm));
 481 #endif
 482     return vec_extract(x.simdInternal_, 0);
 483 }
 484
 485 static inline SimdDBool gmx_simdcall
 486 operator==(SimdDouble a, SimdDouble b)
 487 {
 488     return {
 489                vec_cmpeq(a.simdInternal_, b.simdInternal_)
 490     };
 491 }
 492
 493 static inline SimdDBool gmx_simdcall
 494 operator!=(SimdDouble a, SimdDouble b)
 495 {
 496     return {
 497                reinterpret_cast<__vector vsxBool long long>(vec_or(reinterpret_cast<__vector signed int>(vec_cmpgt(a.simdInternal_, b.simdInternal_)),
 498                                                                    reinterpret_cast<__vector signed int>(vec_cmplt(a.simdInternal_, b.simdInternal_))))
 499     };
 500 }
 501
 502 static inline SimdDBool gmx_simdcall
 503 operator<(SimdDouble a, SimdDouble b)
 504 {
 505     return {
 506                vec_cmplt(a.simdInternal_, b.simdInternal_)
 507     };
 508 }
 509
 510 static inline SimdDBool gmx_simdcall
 511 operator<=(SimdDouble a, SimdDouble b)
 512 {
 513     return {
 514                vec_cmple(a.simdInternal_, b.simdInternal_)
 515     };
 516 }
 517
 518 static inline SimdDBool gmx_simdcall
 519 testBits(SimdDouble a)
 520 {
 521 #ifdef __POWER8_VECTOR__
 522     return {
 523                vec_cmpgt(reinterpret_cast<__vector unsigned long long>(a.simdInternal_), vec_splats(0ULL))
 524     };
 525 #else
 526     return {
 527                reinterpret_cast<__vector vsxBool long long>(vec_nor(reinterpret_cast<__vector signed int>(vec_cmpeq(a.simdInternal_, vec_splats(0.0))), vec_splats(0)))
 528     };
 529 #endif
 530 }
 531
 532 static inline SimdDBool gmx_simdcall
 533 operator&&(SimdDBool a, SimdDBool b)
 534 {
 535     return {
 536                reinterpret_cast<__vector vsxBool long long>(vec_and(reinterpret_cast<__vector signed int>(a.simdInternal_), reinterpret_cast<__vector signed int>(b.simdInternal_)))
 537     };
 538 }
 539
 540 static inline SimdDBool gmx_simdcall
 541 operator||(SimdDBool a, SimdDBool b)
 542 {
 543     return {
 544                reinterpret_cast<__vector vsxBool long long>(vec_or(reinterpret_cast<__vector signed int>(a.simdInternal_), reinterpret_cast<__vector signed int>(b.simdInternal_)))
 545     };
 546 }
 547
 548 static inline bool gmx_simdcall
 549 anyTrue(SimdDBool a)
 550 {
 551     return vec_any_ne(reinterpret_cast<__vector vsxBool int>(a.simdInternal_), reinterpret_cast<__vector vsxBool int>(vec_splats(0)));
 552 }
 553
 554 static inline SimdDouble gmx_simdcall
 555 selectByMask(SimdDouble a, SimdDBool m)
 556 {
 557     return {
 558                vec_and(a.simdInternal_, reinterpret_cast<__vector double>(m.simdInternal_))
 559     };
 560 }
 561
 562 static inline SimdDouble gmx_simdcall
 563 selectByNotMask(SimdDouble a, SimdDBool m)
 564 {
 565     return {
 566                vec_andc(a.simdInternal_, reinterpret_cast<__vector double>(m.simdInternal_))
 567     };
 568 }
 569
 570 static inline SimdDouble gmx_simdcall
 571 blend(SimdDouble a, SimdDouble b, SimdDBool sel)
 572 {
 573     return {
 574                vec_sel(a.simdInternal_, b.simdInternal_, sel.simdInternal_)
 575     };
 576 }
 577
 578 static inline SimdDInt32 gmx_simdcall
 579 operator<<(SimdDInt32 a, int n)
 580 {
 581     return {
 582                vec_sl(a.simdInternal_, vec_splats(static_cast<unsigned int>(n)))
 583     };
 584 }
 585
 586 static inline SimdDInt32 gmx_simdcall
 587 operator>>(SimdDInt32 a, int n)
 588 {
 589     return {
 590                vec_sr(a.simdInternal_, vec_splats(static_cast<unsigned int>(n)))
 591     };
 592 }
 593
 594 static inline SimdDInt32 gmx_simdcall
 595 operator&(SimdDInt32 a, SimdDInt32 b)
 596 {
 597     return {
 598                vec_and(a.simdInternal_, b.simdInternal_)
 599     };
 600 }
 601
 602 static inline SimdDInt32 gmx_simdcall
 603 andNot(SimdDInt32 a, SimdDInt32 b)
 604 {
 605     return {
 606                vec_andc(b.simdInternal_, a.simdInternal_)
 607     };
 608 }
 609
 610 static inline SimdDInt32 gmx_simdcall
 611 operator|(SimdDInt32 a, SimdDInt32 b)
 612 {
 613     return {
 614                vec_or(a.simdInternal_, b.simdInternal_)
 615     };
 616 }
 617
 618 static inline SimdDInt32 gmx_simdcall
 619 operator^(SimdDInt32 a, SimdDInt32 b)
 620 {
 621     return {
 622                vec_xor(a.simdInternal_, b.simdInternal_)
 623     };
 624 }
 625
 626 static inline SimdDInt32 gmx_simdcall
 627 operator+(SimdDInt32 a, SimdDInt32 b)
 628 {
 629     return {
 630                vec_add(a.simdInternal_, b.simdInternal_)
 631     };
 632 }
 633
 634 static inline SimdDInt32 gmx_simdcall
 635 operator-(SimdDInt32 a, SimdDInt32 b)
 636 {
 637     return {
 638                vec_sub(a.simdInternal_, b.simdInternal_)
 639     };
 640 }
 641
 642 static inline SimdDInt32 gmx_simdcall
 643 operator*(SimdDInt32 a, SimdDInt32 b)
 644 {
 645     return {
 646                a.simdInternal_ * b.simdInternal_
 647     };
 648 }
 649
 650 static inline SimdDIBool gmx_simdcall
 651 operator==(SimdDInt32 a, SimdDInt32 b)
 652 {
 653     return {
 654                vec_cmpeq(a.simdInternal_, b.simdInternal_)
 655     };
 656 }
 657
 658 static inline SimdDIBool gmx_simdcall
 659 testBits(SimdDInt32 a)
 660 {
 661     return {
 662                vec_cmpgt( reinterpret_cast<__vector unsigned int>(a.simdInternal_), vec_splats(0U))
 663     };
 664 }
 665
 666 static inline SimdDIBool gmx_simdcall
 667 operator<(SimdDInt32 a, SimdDInt32 b)
 668 {
 669     return {
 670                vec_cmplt(a.simdInternal_, b.simdInternal_)
 671     };
 672 }
 673
 674 static inline SimdDIBool gmx_simdcall
 675 operator&&(SimdDIBool a, SimdDIBool b)
 676 {
 677     return {
 678                vec_and(a.simdInternal_, b.simdInternal_)
 679     };
 680 }
 681
 682 static inline SimdDIBool gmx_simdcall
 683 operator||(SimdDIBool a, SimdDIBool b)
 684 {
 685     return {
 686                vec_or(a.simdInternal_, b.simdInternal_)
 687     };
 688 }
 689
 690 static inline bool gmx_simdcall
 691 anyTrue(SimdDIBool a)
 692 {
 693     return vec_any_ne(a.simdInternal_, reinterpret_cast<__vector vsxBool int>(vec_splats(0)));
 694 }
 695
 696 static inline SimdDInt32 gmx_simdcall
 697 selectByMask(SimdDInt32 a, SimdDIBool m)
 698 {
 699     return {
 700                vec_and(a.simdInternal_, reinterpret_cast<__vector signed int>(m.simdInternal_))
 701     };
 702 }
 703
 704 static inline SimdDInt32 gmx_simdcall
 705 selectByNotMask(SimdDInt32 a, SimdDIBool m)
 706 {
 707     return {
 708                vec_andc(a.simdInternal_, reinterpret_cast<__vector signed int>(m.simdInternal_))
 709     };
 710 }
 711
 712 static inline SimdDInt32 gmx_simdcall
 713 blend(SimdDInt32 a, SimdDInt32 b, SimdDIBool sel)
 714 {
 715     return {
 716                vec_sel(a.simdInternal_, b.simdInternal_, sel.simdInternal_)
 717     };
 718 }
 719
 720 static inline SimdDInt32 gmx_simdcall
 721 cvttR2I(SimdDouble a)
 722 {
 723 #if defined(__GNUC__) && !defined(__ibmxl__) && !defined(__xlC__)
 724 // gcc up to at least version 4.9 is missing intrinsics for converting double to/from int - use inline asm
 725     const __vector unsigned char perm = {4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11};
 726     __vector double              ix;
 727
 728     __asm__ ("xvcvdpsxws %x0,%x1" : "=wa" (ix) : "wd" (a.simdInternal_));
 729
 730     return {
 731                reinterpret_cast<__vector signed int>(vec_perm(ix, ix, perm))
 732     };
 733 #else
 734     return {
 735                vec_cts(a.simdInternal_, 0)
 736     };
 737 #endif
 738 }
 739
 740 static inline SimdDInt32 gmx_simdcall
 741 cvtR2I(SimdDouble a)
 742 {
 743     return cvttR2I(round(a));
 744 }
 745
 746 static inline SimdDouble gmx_simdcall
 747 cvtI2R(SimdDInt32 a)
 748 {
 749 #if defined(__GNUC__) && !defined(__ibmxl__) && !defined(__xlC__)
 750 // gcc up to at least version 4.9 is missing intrinsics for converting double to/from int - use inline asm
 751     const __vector unsigned char perm = {4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11};
 752     __vector double              x;
 753
 754     a.simdInternal_ = vec_perm(a.simdInternal_, a.simdInternal_, perm);
 755     __asm__ ("xvcvsxwdp %x0,%x1" : "=wd" (x) : "wa" (a.simdInternal_));
 756
 757     return {
 758                x
 759     };
 760 #else
 761     return {
 762                vec_ctd(a.simdInternal_, 0)
 763     };
 764 #endif
 765 }
 766
 767 static inline SimdDIBool gmx_simdcall
 768 cvtB2IB(SimdDBool a)
 769 {
 770     return {
 771                reinterpret_cast<__vector vsxBool int>(a.simdInternal_)
 772     };
 773 }
 774
 775 static inline SimdDBool gmx_simdcall
 776 cvtIB2B(SimdDIBool a)
 777 {
 778     return {
 779                reinterpret_cast<__vector vsxBool long long>(a.simdInternal_)
 780     };
 781 }
 782
 783 static inline void gmx_simdcall
 784 cvtF2DD(SimdFloat f, SimdDouble *d0, SimdDouble *d1)
 785 {
 786     __vector float fA, fB;
 787     fA  = vec_mergeh(f.simdInternal_, f.simdInternal_); /* 0011 */
 788     fB  = vec_mergel(f.simdInternal_, f.simdInternal_); /* 2233 */
 789 #if defined(__GNUC__) && !defined(__ibmxl__) && !defined(__xlC__)
 790     // gcc-4.9 is missing double-to-float/float-to-double conversions.
 791     __asm__ ("xvcvspdp %x0,%x1" : "=wd" (d0->simdInternal_) : "wf" (fA));
 792     __asm__ ("xvcvspdp %x0,%x1" : "=wd" (d1->simdInternal_) : "wf" (fB));
 793 #else
 794     d0->simdInternal_ = vec_cvf(fA);    /* 01 */
 795     d1->simdInternal_ = vec_cvf(fB);    /* 23 */
 796 #endif
 797 }
 798
 799 static inline SimdFloat gmx_simdcall
 800 cvtDD2F(SimdDouble d0, SimdDouble d1)
 801 {
 802     __vector float fA, fB, fC, fD, fE;
 803 #if defined(__GNUC__) && !defined(__ibmxl__) && !defined(__xlC__)
 804     // gcc-4.9 is missing double-to-float/float-to-double conversions.
 805     __asm__ ("xvcvdpsp %x0,%x1" : "=wf" (fA) : "wd" (d0.simdInternal_));
 806     __asm__ ("xvcvdpsp %x0,%x1" : "=wf" (fB) : "wd" (d1.simdInternal_));
 807 #else
 808     fA = vec_cvf(d0.simdInternal_); /* 0x1x */
 809     fB = vec_cvf(d1.simdInternal_); /* 2x3x */
 810 #endif
 811     fC = vec_mergeh(fA, fB);        /* 02xx */
 812     fD = vec_mergel(fA, fB);        /* 13xx */
 813     fE = vec_mergeh(fC, fD);        /* 0123 */
 814     return {
 815                fE
 816     };
 817 }
 818
 819 static inline SimdDouble gmx_simdcall
 820 copysign(SimdDouble x, SimdDouble y)
 821 {
 822 #if defined(__GNUC__) && !defined(__ibmxl__) && !defined(__xlC__)
 823     __vector double res;
 824     __asm__ ("xvcpsgndp %x0,%x1,%x2" : "=wd" (res) : "wd" (y.simdInternal_), "wd" (x.simdInternal_));
 825     return {
 826                res
 827     };
 828 #else
 829     return {
 830                vec_cpsgn(y.simdInternal_, x.simdInternal_)
 831     };
 832 #endif
 833 }
 834
 835 }      // namespace gmx
 836
 837 #endif // GMX_SIMD_IMPLEMENTATION_IBM_VSX_SIMD_DOUBLE_H