src/gromacs/simd/impl_ibm_vsx/impl_ibm_vsx_simd_double.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2014,2015,2016, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 #ifndef GMX_SIMD_IMPLEMENTATION_IBM_VSX_SIMD_DOUBLE_H
  37 #define GMX_SIMD_IMPLEMENTATION_IBM_VSX_SIMD_DOUBLE_H
  38
  39 #include "config.h"
  40
  41 #include "gromacs/utility/basedefinitions.h"
  42
  43 #include "impl_ibm_vsx_definitions.h"
  44
  45 namespace gmx
  46 {
  47
  48 class SimdDouble
  49 {
  50     public:
  51         SimdDouble() {}
  52
  53         // gcc-4.9 does not recognize that we use the parameter
  54         SimdDouble(double gmx_unused d) : simdInternal_(vec_splats(d)) {}
  55
  56         // Internal utility constructor to simplify return statements
  57         SimdDouble(__vector double simd) : simdInternal_(simd) {}
  58
  59         __vector double  simdInternal_;
  60 };
  61
  62 class SimdDInt32
  63 {
  64     public:
  65         SimdDInt32() {}
  66
  67         // gcc-4.9 does not recognize that we use the parameter
  68         SimdDInt32(std::int32_t gmx_unused i) : simdInternal_(vec_splats(i)) {}
  69
  70         // Internal utility constructor to simplify return statements
  71         SimdDInt32(__vector signed int simd) : simdInternal_(simd) {}
  72
  73         __vector signed int  simdInternal_;
  74 };
  75
  76 class SimdDBool
  77 {
  78     public:
  79         SimdDBool() {}
  80
  81         SimdDBool(bool b) : simdInternal_(reinterpret_cast<__vector vsxBool long long>(vec_splats( b ? 0xFFFFFFFFFFFFFFFFULL : 0))) {}
  82
  83         // Internal utility constructor to simplify return statements
  84         SimdDBool(__vector vsxBool long long simd) : simdInternal_(simd) {}
  85
  86         __vector vsxBool long long simdInternal_;
  87 };
  88
  89 class SimdDIBool
  90 {
  91     public:
  92         SimdDIBool() {}
  93
  94         SimdDIBool(bool b) : simdInternal_(reinterpret_cast<__vector vsxBool int>(vec_splats( b ? 0xFFFFFFFF : 0))) {}
  95
  96         // Internal utility constructor to simplify return statements
  97         SimdDIBool(__vector vsxBool int simd) : simdInternal_(simd) {}
  98
  99         __vector vsxBool int  simdInternal_;
 100 };
 101
 102 // The VSX load & store operations are a bit of a mess. The interface is different
 103 // for xlc version 12, xlc version 13, and gcc. Long-term IBM recommends
 104 // simply using pointer dereferencing both for aligned and unaligned loads.
 105 // That's nice, but unfortunately xlc still bugs out when the pointer is
 106 // not aligned. Sticking to vec_xl/vec_xst isn't a solution either, since
 107 // that appears to be buggy for some _aligned_ loads :-)
 108 //
 109 // For now, we use pointer dereferencing for all aligned load/stores, and
 110 // for unaligned ones with gcc. On xlc we use vec_xlw4/vec_xstw4 for
 111 // unaligned memory operations. The latest docs recommend using the overloaded
 112 // vec_xl/vec_xst, but that is not supported on xlc version 12. We'll
 113 // revisit things once xlc is a bit more stable - for now you probably want
 114 // to stick to gcc...
 115
 116 static inline SimdDouble gmx_simdcall
 117 simdLoad(const double *m)
 118 {
 119     return {
 120                *reinterpret_cast<const __vector double *>(m)
 121     };
 122 }
 123
 124 static inline void gmx_simdcall
 125 store(double *m, SimdDouble a)
 126 {
 127     *reinterpret_cast<__vector double *>(m) = a.simdInternal_;
 128 }
 129
 130 static inline SimdDouble gmx_simdcall
 131 simdLoadU(const double *m)
 132 {
 133 #if defined(__ibmxl__) || defined(__xlC__)
 134     return {
 135                vec_xlw4(0, const_cast<double *>(m))
 136     }
 137 #else
 138     return {
 139                *reinterpret_cast<const __vector double *>(m)
 140     };
 141 #endif
 142 }
 143
 144 static inline void gmx_simdcall
 145 storeU(double *m, SimdDouble a)
 146 {
 147 #if defined(__ibmxl__) || defined(__xlC__)
 148     vec_xstw4(a.simdInternal_, 0, m);
 149 #else
 150     *reinterpret_cast<__vector double *>(m) = a.simdInternal_;
 151 #endif
 152 }
 153
 154 static inline SimdDouble gmx_simdcall
 155 setZeroD()
 156 {
 157     return {
 158                vec_splats(0.0)
 159     };
 160 }
 161
 162 static inline SimdDInt32 gmx_simdcall
 163 simdLoadDI(const std::int32_t * m)
 164 {
 165     __vector signed int          t0, t1;
 166     const __vector unsigned char perm = { 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19, 16, 17, 18, 19 };
 167     t0 = vec_splats(m[0]);
 168     t1 = vec_splats(m[1]);
 169     return {
 170                vec_perm(t0, t1, perm)
 171     };
 172 }
 173
 174 // gcc-4.9 does not understand that arguments to vec_extract() are used
 175 static inline void gmx_simdcall
 176 store(std::int32_t * m, SimdDInt32 gmx_unused x)
 177 {
 178     m[0] = vec_extract(x.simdInternal_, 0);
 179     m[1] = vec_extract(x.simdInternal_, 2);
 180 }
 181
 182 static inline SimdDInt32 gmx_simdcall
 183 simdLoadUDI(const std::int32_t *m)
 184 {
 185     return simdLoadDI(m);
 186 }
 187
 188 static inline void gmx_simdcall
 189 storeU(std::int32_t * m, SimdDInt32 a)
 190 {
 191     return store(m, a);
 192 }
 193
 194 static inline SimdDInt32 gmx_simdcall
 195 setZeroDI()
 196 {
 197     return {
 198                vec_splats(static_cast<int>(0))
 199     };
 200 }
 201
 202 // gcc-4.9 does not detect that vec_extract() uses its argument
 203 template<int index>
 204 static inline std::int32_t gmx_simdcall
 205 extract(SimdDInt32 gmx_unused a)
 206 {
 207     return vec_extract(a.simdInternal_, 2*index);
 208 }
 209
 210 static inline SimdDouble gmx_simdcall
 211 operator&(SimdDouble a, SimdDouble b)
 212 {
 213     return {
 214                vec_and(a.simdInternal_, b.simdInternal_)
 215     };
 216 }
 217
 218 static inline SimdDouble gmx_simdcall
 219 andNot(SimdDouble a, SimdDouble b)
 220 {
 221     return {
 222                vec_andc(b.simdInternal_, a.simdInternal_)
 223     };
 224 }
 225
 226 static inline SimdDouble gmx_simdcall
 227 operator|(SimdDouble a, SimdDouble b)
 228 {
 229     return {
 230                vec_or(a.simdInternal_, b.simdInternal_)
 231     };
 232 }
 233
 234 static inline SimdDouble gmx_simdcall
 235 operator^(SimdDouble a, SimdDouble b)
 236 {
 237     return {
 238                vec_xor(a.simdInternal_, b.simdInternal_)
 239     };
 240 }
 241
 242 static inline SimdDouble gmx_simdcall
 243 operator+(SimdDouble a, SimdDouble b)
 244 {
 245     return {
 246                vec_add(a.simdInternal_, b.simdInternal_)
 247     };
 248 }
 249
 250 static inline SimdDouble gmx_simdcall
 251 operator-(SimdDouble a, SimdDouble b)
 252 {
 253     return {
 254                vec_sub(a.simdInternal_, b.simdInternal_)
 255     };
 256 }
 257
 258 static inline SimdDouble gmx_simdcall
 259 operator-(SimdDouble x)
 260 {
 261     return {
 262                -x.simdInternal_
 263     };
 264 }
 265
 266 static inline SimdDouble gmx_simdcall
 267 operator*(SimdDouble a, SimdDouble b)
 268 {
 269     return {
 270                vec_mul(a.simdInternal_, b.simdInternal_)
 271     };
 272 }
 273
 274 static inline SimdDouble gmx_simdcall
 275 fma(SimdDouble a, SimdDouble b, SimdDouble c)
 276 {
 277     return {
 278                vec_madd(a.simdInternal_, b.simdInternal_, c.simdInternal_)
 279     };
 280 }
 281
 282 static inline SimdDouble gmx_simdcall
 283 fms(SimdDouble a, SimdDouble b, SimdDouble c)
 284 {
 285     return {
 286                vec_msub(a.simdInternal_, b.simdInternal_, c.simdInternal_)
 287     };
 288 }
 289
 290 static inline SimdDouble gmx_simdcall
 291 fnma(SimdDouble a, SimdDouble b, SimdDouble c)
 292 {
 293     return {
 294                vec_nmsub(a.simdInternal_, b.simdInternal_, c.simdInternal_)
 295     };
 296 }
 297
 298 static inline SimdDouble gmx_simdcall
 299 fnms(SimdDouble a, SimdDouble b, SimdDouble c)
 300 {
 301     return {
 302                vec_nmadd(a.simdInternal_, b.simdInternal_, c.simdInternal_)
 303     };
 304 }
 305
 306 static inline SimdDouble gmx_simdcall
 307 rsqrt(SimdDouble x)
 308 {
 309     return {
 310                vec_rsqrte(x.simdInternal_)
 311     };
 312 }
 313
 314 static inline SimdDouble gmx_simdcall
 315 rcp(SimdDouble x)
 316 {
 317     return {
 318                vec_re(x.simdInternal_)
 319     };
 320 }
 321
 322 static inline SimdDouble gmx_simdcall
 323 maskAdd(SimdDouble a, SimdDouble b, SimdDBool m)
 324 {
 325     return {
 326                vec_add(a.simdInternal_, vec_and(b.simdInternal_, reinterpret_cast<__vector double>(m.simdInternal_)))
 327     };
 328 }
 329
 330 static inline SimdDouble gmx_simdcall
 331 maskzMul(SimdDouble a, SimdDouble b, SimdDBool m)
 332 {
 333     SimdDouble prod = a * b;
 334
 335     return {
 336                vec_and(prod.simdInternal_, reinterpret_cast<__vector double>(m.simdInternal_))
 337     };
 338 }
 339
 340 static inline SimdDouble gmx_simdcall
 341 maskzFma(SimdDouble a, SimdDouble b, SimdDouble c, SimdDBool m)
 342 {
 343     SimdDouble prod = fma(a, b, c);
 344
 345     return {
 346                vec_and(prod.simdInternal_, reinterpret_cast<__vector double>(m.simdInternal_))
 347     };
 348 }
 349
 350 static inline SimdDouble gmx_simdcall
 351 maskzRsqrt(SimdDouble x, SimdDBool m)
 352 {
 353 #ifndef NDEBUG
 354     x.simdInternal_ = vec_sel(vec_splats(1.0f), x.simdInternal_, m.simdInternal_);
 355 #endif
 356     return {
 357                vec_and(vec_rsqrte(x.simdInternal_), reinterpret_cast<__vector double>(m.simdInternal_))
 358     };
 359 }
 360
 361 static inline SimdDouble gmx_simdcall
 362 maskzRcp(SimdDouble x, SimdDBool m)
 363 {
 364 #ifndef NDEBUG
 365     x.simdInternal_ = vec_sel(vec_splats(1.0f), x.simdInternal_, m.simdInternal_);
 366 #endif
 367     return {
 368                vec_and(vec_re(x.simdInternal_), reinterpret_cast<__vector double>(m.simdInternal_))
 369     };
 370 }
 371
 372 static inline SimdDouble gmx_simdcall
 373 abs(SimdDouble x)
 374 {
 375     return {
 376                vec_abs( x.simdInternal_ )
 377     };
 378 }
 379
 380 static inline SimdDouble gmx_simdcall
 381 max(SimdDouble a, SimdDouble b)
 382 {
 383     return {
 384                vec_max(a.simdInternal_, b.simdInternal_)
 385     };
 386 }
 387
 388 static inline SimdDouble gmx_simdcall
 389 min(SimdDouble a, SimdDouble b)
 390 {
 391     return {
 392                vec_min(a.simdInternal_, b.simdInternal_)
 393     };
 394 }
 395
 396 static inline SimdDouble gmx_simdcall
 397 round(SimdDouble x)
 398 {
 399 #if defined(__GNUC__) && !defined(__ibmxl__) && !defined(__xlC__)
 400 // gcc up to at least version 4.9 does not have vec_round() in double precision - use inline asm
 401     __vector double res;
 402     __asm__ ("xvrdpi %x0,%x1" : "=wd" (res) : "wd" (x.simdInternal_));
 403     return {
 404                res
 405     };
 406 #else
 407     return {
 408                vec_round( x.simdInternal_ )
 409     };
 410 #endif
 411 }
 412
 413 static inline SimdDouble gmx_simdcall
 414 trunc(SimdDouble x)
 415 {
 416     return {
 417                vec_trunc( x.simdInternal_ )
 418     };
 419 }
 420
 421 static inline SimdDouble
 422 frexp(SimdDouble value, SimdDInt32 * exponent)
 423 {
 424     // Don't use _mm_set1_epi64x() - on MSVC it is only supported for 64-bit builds
 425     const __vector double     exponentMask = reinterpret_cast<__vector double>(vec_splats(0x7FF0000000000000ULL));
 426     const __vector signed int exponentBias = vec_splats(1022);
 427     const __vector double     half         = vec_splats(0.5);
 428     __vector signed int       iExponent;
 429
 430     iExponent               = reinterpret_cast<__vector signed int>(vec_and(value.simdInternal_, exponentMask));
 431     // The data is in the upper half of each double (corresponding to elements 1 and 3).
 432     // First shift 52-32=20bits, and then permute to swap element 0 with 1 and element 2 with 3
 433     // For big endian they are in opposite order, so then we simply skip the swap.
 434     iExponent               = vec_sr(iExponent, vec_splats(20U));
 435 #ifndef __BIG_ENDIAN__
 436     const __vector unsigned char perm = {4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11};
 437     iExponent               = vec_perm(iExponent, iExponent, perm);
 438 #endif
 439     iExponent               = vec_sub(iExponent, exponentBias);
 440     exponent->simdInternal_ = iExponent;
 441
 442     return {
 443                vec_or(vec_andc(value.simdInternal_, exponentMask), half)
 444     };
 445 }
 446
 447 static inline SimdDouble
 448 ldexp(SimdDouble value, SimdDInt32 exponent)
 449 {
 450     const __vector signed int    exponentBias = vec_splats(1023);
 451     __vector signed int          iExponent;
 452 #ifdef __BIG_ENDIAN__
 453     const __vector unsigned char perm = {0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 16, 17, 18, 19};
 454 #else
 455     const __vector unsigned char perm = {16, 17, 18, 19, 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11};
 456 #endif
 457
 458     iExponent = vec_add(exponent.simdInternal_, exponentBias);
 459     // exponent is now present in pairs of integers; 0011.
 460     // Elements 0/2 already correspond to the upper half of each double,
 461     // so we only need to shift by another 52-32=20 bits.
 462     // The remaining elements are set to zero.
 463     iExponent = vec_sl(iExponent, vec_splats(20U));
 464     iExponent = vec_perm(iExponent, vec_splats(0), perm);
 465
 466     return {
 467                vec_mul(value.simdInternal_, reinterpret_cast<__vector double>(iExponent))
 468     };
 469 }
 470
 471 static inline double gmx_simdcall
 472 reduce(SimdDouble x)
 473 {
 474     const __vector unsigned char perm = { 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7 };
 475 #ifdef __xlC__
 476     /* old xlc version 12 does not understand vec_perm() with double arguments */
 477     x.simdInternal_ = vec_add(x.simdInternal_,
 478                               reinterpret_cast<__vector double>(vec_perm(reinterpret_cast<__vector signed int>(x.simdInternal_),
 479                                                                          reinterpret_cast<__vector signed int>(x.simdInternal_), perm)));
 480 #else
 481     x.simdInternal_ = vec_add(x.simdInternal_, vec_perm(x.simdInternal_, x.simdInternal_, perm));
 482 #endif
 483     return vec_extract(x.simdInternal_, 0);
 484 }
 485
 486 static inline SimdDBool gmx_simdcall
 487 operator==(SimdDouble a, SimdDouble b)
 488 {
 489     return {
 490                vec_cmpeq(a.simdInternal_, b.simdInternal_)
 491     };
 492 }
 493
 494 static inline SimdDBool gmx_simdcall
 495 operator!=(SimdDouble a, SimdDouble b)
 496 {
 497     return {
 498                reinterpret_cast<__vector vsxBool long long>(vec_or(reinterpret_cast<__vector signed int>(vec_cmpgt(a.simdInternal_, b.simdInternal_)),
 499                                                                    reinterpret_cast<__vector signed int>(vec_cmplt(a.simdInternal_, b.simdInternal_))))
 500     };
 501 }
 502
 503 static inline SimdDBool gmx_simdcall
 504 operator<(SimdDouble a, SimdDouble b)
 505 {
 506     return {
 507                vec_cmplt(a.simdInternal_, b.simdInternal_)
 508     };
 509 }
 510
 511 static inline SimdDBool gmx_simdcall
 512 operator<=(SimdDouble a, SimdDouble b)
 513 {
 514     return {
 515                vec_cmple(a.simdInternal_, b.simdInternal_)
 516     };
 517 }
 518
 519 static inline SimdDBool gmx_simdcall
 520 testBits(SimdDouble a)
 521 {
 522 #ifdef __POWER8_VECTOR__
 523     return {
 524                vec_cmpgt(reinterpret_cast<__vector unsigned long long>(a.simdInternal_), vec_splats(0ULL))
 525     };
 526 #else
 527     return {
 528                reinterpret_cast<__vector vsxBool long long>(vec_nor(reinterpret_cast<__vector signed int>(vec_cmpeq(a.simdInternal_, vec_splats(0.0))), vec_splats(0)))
 529     };
 530 #endif
 531 }
 532
 533 static inline SimdDBool gmx_simdcall
 534 operator&&(SimdDBool a, SimdDBool b)
 535 {
 536     return {
 537                reinterpret_cast<__vector vsxBool long long>(vec_and(reinterpret_cast<__vector signed int>(a.simdInternal_), reinterpret_cast<__vector signed int>(b.simdInternal_)))
 538     };
 539 }
 540
 541 static inline SimdDBool gmx_simdcall
 542 operator||(SimdDBool a, SimdDBool b)
 543 {
 544     return {
 545                reinterpret_cast<__vector vsxBool long long>(vec_or(reinterpret_cast<__vector signed int>(a.simdInternal_), reinterpret_cast<__vector signed int>(b.simdInternal_)))
 546     };
 547 }
 548
 549 static inline bool gmx_simdcall
 550 anyTrue(SimdDBool a)
 551 {
 552     return vec_any_ne(reinterpret_cast<__vector vsxBool int>(a.simdInternal_), reinterpret_cast<__vector vsxBool int>(vec_splats(0)));
 553 }
 554
 555 static inline SimdDouble gmx_simdcall
 556 selectByMask(SimdDouble a, SimdDBool m)
 557 {
 558     return {
 559                vec_and(a.simdInternal_, reinterpret_cast<__vector double>(m.simdInternal_))
 560     };
 561 }
 562
 563 static inline SimdDouble gmx_simdcall
 564 selectByNotMask(SimdDouble a, SimdDBool m)
 565 {
 566     return {
 567                vec_andc(a.simdInternal_, reinterpret_cast<__vector double>(m.simdInternal_))
 568     };
 569 }
 570
 571 static inline SimdDouble gmx_simdcall
 572 blend(SimdDouble a, SimdDouble b, SimdDBool sel)
 573 {
 574     return {
 575                vec_sel(a.simdInternal_, b.simdInternal_, sel.simdInternal_)
 576     };
 577 }
 578
 579 static inline SimdDInt32 gmx_simdcall
 580 operator<<(SimdDInt32 a, int n)
 581 {
 582     return {
 583                vec_sl(a.simdInternal_, vec_splats(static_cast<unsigned int>(n)))
 584     };
 585 }
 586
 587 static inline SimdDInt32 gmx_simdcall
 588 operator>>(SimdDInt32 a, int n)
 589 {
 590     return {
 591                vec_sr(a.simdInternal_, vec_splats(static_cast<unsigned int>(n)))
 592     };
 593 }
 594
 595 static inline SimdDInt32 gmx_simdcall
 596 operator&(SimdDInt32 a, SimdDInt32 b)
 597 {
 598     return {
 599                vec_and(a.simdInternal_, b.simdInternal_)
 600     };
 601 }
 602
 603 static inline SimdDInt32 gmx_simdcall
 604 andNot(SimdDInt32 a, SimdDInt32 b)
 605 {
 606     return {
 607                vec_andc(b.simdInternal_, a.simdInternal_)
 608     };
 609 }
 610
 611 static inline SimdDInt32 gmx_simdcall
 612 operator|(SimdDInt32 a, SimdDInt32 b)
 613 {
 614     return {
 615                vec_or(a.simdInternal_, b.simdInternal_)
 616     };
 617 }
 618
 619 static inline SimdDInt32 gmx_simdcall
 620 operator^(SimdDInt32 a, SimdDInt32 b)
 621 {
 622     return {
 623                vec_xor(a.simdInternal_, b.simdInternal_)
 624     };
 625 }
 626
 627 static inline SimdDInt32 gmx_simdcall
 628 operator+(SimdDInt32 a, SimdDInt32 b)
 629 {
 630     return {
 631                vec_add(a.simdInternal_, b.simdInternal_)
 632     };
 633 }
 634
 635 static inline SimdDInt32 gmx_simdcall
 636 operator-(SimdDInt32 a, SimdDInt32 b)
 637 {
 638     return {
 639                vec_sub(a.simdInternal_, b.simdInternal_)
 640     };
 641 }
 642
 643 static inline SimdDInt32 gmx_simdcall
 644 operator*(SimdDInt32 a, SimdDInt32 b)
 645 {
 646     return {
 647                a.simdInternal_ * b.simdInternal_
 648     };
 649 }
 650
 651 static inline SimdDIBool gmx_simdcall
 652 operator==(SimdDInt32 a, SimdDInt32 b)
 653 {
 654     return {
 655                vec_cmpeq(a.simdInternal_, b.simdInternal_)
 656     };
 657 }
 658
 659 static inline SimdDIBool gmx_simdcall
 660 testBits(SimdDInt32 a)
 661 {
 662     return {
 663                vec_cmpgt( reinterpret_cast<__vector unsigned int>(a.simdInternal_), vec_splats(0U))
 664     };
 665 }
 666
 667 static inline SimdDIBool gmx_simdcall
 668 operator<(SimdDInt32 a, SimdDInt32 b)
 669 {
 670     return {
 671                vec_cmplt(a.simdInternal_, b.simdInternal_)
 672     };
 673 }
 674
 675 static inline SimdDIBool gmx_simdcall
 676 operator&&(SimdDIBool a, SimdDIBool b)
 677 {
 678     return {
 679                vec_and(a.simdInternal_, b.simdInternal_)
 680     };
 681 }
 682
 683 static inline SimdDIBool gmx_simdcall
 684 operator||(SimdDIBool a, SimdDIBool b)
 685 {
 686     return {
 687                vec_or(a.simdInternal_, b.simdInternal_)
 688     };
 689 }
 690
 691 static inline bool gmx_simdcall
 692 anyTrue(SimdDIBool a)
 693 {
 694     return vec_any_ne(a.simdInternal_, reinterpret_cast<__vector vsxBool int>(vec_splats(0)));
 695 }
 696
 697 static inline SimdDInt32 gmx_simdcall
 698 selectByMask(SimdDInt32 a, SimdDIBool m)
 699 {
 700     return {
 701                vec_and(a.simdInternal_, reinterpret_cast<__vector signed int>(m.simdInternal_))
 702     };
 703 }
 704
 705 static inline SimdDInt32 gmx_simdcall
 706 selectByNotMask(SimdDInt32 a, SimdDIBool m)
 707 {
 708     return {
 709                vec_andc(a.simdInternal_, reinterpret_cast<__vector signed int>(m.simdInternal_))
 710     };
 711 }
 712
 713 static inline SimdDInt32 gmx_simdcall
 714 blend(SimdDInt32 a, SimdDInt32 b, SimdDIBool sel)
 715 {
 716     return {
 717                vec_sel(a.simdInternal_, b.simdInternal_, sel.simdInternal_)
 718     };
 719 }
 720
 721 static inline SimdDInt32 gmx_simdcall
 722 cvttR2I(SimdDouble a)
 723 {
 724 #if defined(__GNUC__) && !defined(__ibmxl__) && !defined(__xlC__)
 725 // gcc up to at least version 4.9 is missing intrinsics for converting double to/from int - use inline asm
 726     const __vector unsigned char perm = {4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11};
 727     __vector double              ix;
 728
 729     __asm__ ("xvcvdpsxws %x0,%x1" : "=wa" (ix) : "wd" (a.simdInternal_));
 730
 731     return {
 732                reinterpret_cast<__vector signed int>(vec_perm(ix, ix, perm))
 733     };
 734 #else
 735     return {
 736                vec_cts(a.simdInternal_, 0)
 737     };
 738 #endif
 739 }
 740
 741 static inline SimdDInt32 gmx_simdcall
 742 cvtR2I(SimdDouble a)
 743 {
 744     return cvttR2I(round(a));
 745 }
 746
 747 static inline SimdDouble gmx_simdcall
 748 cvtI2R(SimdDInt32 a)
 749 {
 750 #if defined(__GNUC__) && !defined(__ibmxl__) && !defined(__xlC__)
 751 // gcc up to at least version 4.9 is missing intrinsics for converting double to/from int - use inline asm
 752     const __vector unsigned char perm = {4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11};
 753     __vector double              x;
 754
 755     a.simdInternal_ = vec_perm(a.simdInternal_, a.simdInternal_, perm);
 756     __asm__ ("xvcvsxwdp %x0,%x1" : "=wd" (x) : "wa" (a.simdInternal_));
 757
 758     return {
 759                x
 760     };
 761 #else
 762     return {
 763                vec_ctd(a.simdInternal_, 0)
 764     };
 765 #endif
 766 }
 767
 768 static inline SimdDIBool gmx_simdcall
 769 cvtB2IB(SimdDBool a)
 770 {
 771     return {
 772                reinterpret_cast<__vector vsxBool int>(a.simdInternal_)
 773     };
 774 }
 775
 776 static inline SimdDBool gmx_simdcall
 777 cvtIB2B(SimdDIBool a)
 778 {
 779     return {
 780                reinterpret_cast<__vector vsxBool long long>(a.simdInternal_)
 781     };
 782 }
 783
 784 static inline void gmx_simdcall
 785 cvtF2DD(SimdFloat f, SimdDouble *d0, SimdDouble *d1)
 786 {
 787     __vector float fA, fB;
 788     fA  = vec_mergeh(f.simdInternal_, f.simdInternal_); /* 0011 */
 789     fB  = vec_mergel(f.simdInternal_, f.simdInternal_); /* 2233 */
 790 #if defined(__GNUC__) && !defined(__ibmxl__) && !defined(__xlC__)
 791     // gcc-4.9 is missing double-to-float/float-to-double conversions.
 792     __asm__ ("xvcvspdp %x0,%x1" : "=wd" (d0->simdInternal_) : "wf" (fA));
 793     __asm__ ("xvcvspdp %x0,%x1" : "=wd" (d1->simdInternal_) : "wf" (fB));
 794 #else
 795     d0->simdInternal_ = vec_cvf(fA);    /* 01 */
 796     d1->simdInternal_ = vec_cvf(fB);    /* 23 */
 797 #endif
 798 }
 799
 800 static inline SimdFloat gmx_simdcall
 801 cvtDD2F(SimdDouble d0, SimdDouble d1)
 802 {
 803     __vector float fA, fB, fC, fD, fE;
 804 #if defined(__GNUC__) && !defined(__ibmxl__) && !defined(__xlC__)
 805     // gcc-4.9 is missing double-to-float/float-to-double conversions.
 806     __asm__ ("xvcvdpsp %x0,%x1" : "=wf" (fA) : "wd" (d0.simdInternal_));
 807     __asm__ ("xvcvdpsp %x0,%x1" : "=wf" (fB) : "wd" (d1.simdInternal_));
 808 #else
 809     fA = vec_cvf(d0.simdInternal_); /* 0x1x */
 810     fB = vec_cvf(d1.simdInternal_); /* 2x3x */
 811 #endif
 812     fC = vec_mergeh(fA, fB);        /* 02xx */
 813     fD = vec_mergel(fA, fB);        /* 13xx */
 814     fE = vec_mergeh(fC, fD);        /* 0123 */
 815     return {
 816                fE
 817     };
 818 }
 819
 820 static inline SimdDouble gmx_simdcall
 821 copysign(SimdDouble x, SimdDouble y)
 822 {
 823 #if defined(__GNUC__) && !defined(__ibmxl__) && !defined(__xlC__)
 824     __vector double res;
 825     __asm__ ("xvcpsgndp %x0,%x1,%x2" : "=wd" (res) : "wd" (y.simdInternal_), "wd" (x.simdInternal_));
 826     return {
 827                res
 828     };
 829 #else
 830     return {
 831                vec_cpsgn(y.simdInternal_, x.simdInternal_)
 832     };
 833 #endif
 834 }
 835
 836 }      // namespace gmx
 837
 838 #endif // GMX_SIMD_IMPLEMENTATION_IBM_VSX_SIMD_DOUBLE_H