src/gromacs/simd/impl_ibm_vsx/impl_ibm_vsx_simd_double.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 #ifndef GMX_SIMD_IMPLEMENTATION_IBM_VSX_SIMD_DOUBLE_H
  37 #define GMX_SIMD_IMPLEMENTATION_IBM_VSX_SIMD_DOUBLE_H
  38
  39 #include "config.h"
  40
  41 #include "gromacs/math/utilities.h"
  42 #include "gromacs/utility/basedefinitions.h"
  43
  44 #include "impl_ibm_vsx_definitions.h"
  45
  46 namespace gmx
  47 {
  48
  49 class SimdDouble
  50 {
  51 public:
  52     SimdDouble() {}
  53
  54     // gcc-4.9 does not recognize that we use the parameter
  55     SimdDouble(double gmx_unused d) : simdInternal_(vec_splats(d)) {}
  56
  57     // Internal utility constructor to simplify return statements
  58     SimdDouble(__vector double simd) : simdInternal_(simd) {}
  59
  60     __vector double simdInternal_;
  61 };
  62
  63 class SimdDInt32
  64 {
  65 public:
  66     SimdDInt32() {}
  67
  68     // gcc-4.9 does not recognize that we use the parameter
  69     SimdDInt32(std::int32_t gmx_unused i) : simdInternal_(vec_splats(i)) {}
  70
  71     // Internal utility constructor to simplify return statements
  72     SimdDInt32(__vector signed int simd) : simdInternal_(simd) {}
  73
  74     __vector signed int simdInternal_;
  75 };
  76
  77 class SimdDBool
  78 {
  79 public:
  80     SimdDBool() {}
  81
  82     SimdDBool(bool b) :
  83         simdInternal_(reinterpret_cast<__vector vsxBool long long>(vec_splats(b ? 0xFFFFFFFFFFFFFFFFULL : 0)))
  84     {
  85     }
  86
  87     // Internal utility constructor to simplify return statements
  88     SimdDBool(__vector vsxBool long long simd) : simdInternal_(simd) {}
  89
  90     __vector vsxBool long long simdInternal_;
  91 };
  92
  93 class SimdDIBool
  94 {
  95 public:
  96     SimdDIBool() {}
  97
  98     SimdDIBool(bool b) :
  99         simdInternal_(reinterpret_cast<__vector vsxBool int>(vec_splats(b ? 0xFFFFFFFF : 0)))
 100     {
 101     }
 102
 103     // Internal utility constructor to simplify return statements
 104     SimdDIBool(__vector vsxBool int simd) : simdInternal_(simd) {}
 105
 106     __vector vsxBool int simdInternal_;
 107 };
 108
 109 // Note that the interfaces we use here have been a mess in xlc;
 110 // currently version 13.1.5 is required.
 111
 112 static inline SimdDouble gmx_simdcall simdLoad(const double* m, SimdDoubleTag = {})
 113 {
 114     return
 115     {
 116 #if defined(__ibmxl__)
 117         vec_ld(0, m)
 118 #else
 119 #    if __GNUC__ < 7
 120         *reinterpret_cast<const __vector double*>(m)
 121 #    else
 122         vec_vsx_ld(0, m)
 123 #    endif
 124 #endif
 125     };
 126 }
 127
 128 static inline void gmx_simdcall store(double* m, SimdDouble a)
 129 {
 130 #if defined(__ibmxl__)
 131     vec_st(a.simdInternal_, 0, m);
 132 #else
 133 #    if __GNUC__ < 7
 134     *reinterpret_cast<__vector double*>(m) = a.simdInternal_;
 135 #    else
 136     vec_vsx_st(a.simdInternal_, 0, m);
 137 #    endif
 138 #endif
 139 }
 140
 141 static inline SimdDouble gmx_simdcall simdLoadU(const double* m, SimdDoubleTag = {})
 142 {
 143     return
 144     {
 145 #if defined(__ibmxl__)
 146         vec_xl(0, m)
 147 #else
 148 #    if __GNUC__ < 7
 149         *reinterpret_cast<const __vector double*>(m)
 150 #    else
 151         vec_vsx_ld(0, m)
 152 #    endif
 153 #endif
 154     };
 155 }
 156
 157 static inline void gmx_simdcall storeU(double* m, SimdDouble a)
 158 {
 159 #if defined(__ibmxl__)
 160     vec_xst(a.simdInternal_, 0, m);
 161 #else
 162 #    if __GNUC__ < 7
 163     *reinterpret_cast<__vector double*>(m) = a.simdInternal_;
 164 #    else
 165     vec_vsx_st(a.simdInternal_, 0, m);
 166 #    endif
 167 #endif
 168 }
 169
 170 static inline SimdDouble gmx_simdcall setZeroD()
 171 {
 172     return { vec_splats(0.0) };
 173 }
 174
 175 static inline SimdDInt32 gmx_simdcall simdLoad(const std::int32_t* m, SimdDInt32Tag)
 176 {
 177     __vector signed int          t0, t1;
 178     const __vector unsigned char perm = { 0, 1, 2, 3, 0, 1, 2, 3, 16, 17, 18, 19, 16, 17, 18, 19 };
 179     t0                                = vec_splats(m[0]);
 180     t1                                = vec_splats(m[1]);
 181     return { vec_perm(t0, t1, perm) };
 182 }
 183
 184 // gcc-4.9 does not understand that arguments to vec_extract() are used
 185 static inline void gmx_simdcall store(std::int32_t* m, SimdDInt32 gmx_unused x)
 186 {
 187     m[0] = vec_extract(x.simdInternal_, 0);
 188     m[1] = vec_extract(x.simdInternal_, 2);
 189 }
 190
 191 static inline SimdDInt32 gmx_simdcall simdLoadU(const std::int32_t* m, SimdDInt32Tag)
 192 {
 193     return simdLoad(m, SimdDInt32Tag());
 194 }
 195
 196 static inline void gmx_simdcall storeU(std::int32_t* m, SimdDInt32 a)
 197 {
 198     return store(m, a);
 199 }
 200
 201 static inline SimdDInt32 gmx_simdcall setZeroDI()
 202 {
 203     return { vec_splats(static_cast<int>(0)) };
 204 }
 205
 206 // gcc-4.9 does not detect that vec_extract() uses its argument
 207 template<int index>
 208 static inline std::int32_t gmx_simdcall extract(SimdDInt32 gmx_unused a)
 209 {
 210     return vec_extract(a.simdInternal_, 2 * index);
 211 }
 212
 213 static inline SimdDouble gmx_simdcall operator&(SimdDouble a, SimdDouble b)
 214 {
 215     return { vec_and(a.simdInternal_, b.simdInternal_) };
 216 }
 217
 218 static inline SimdDouble gmx_simdcall andNot(SimdDouble a, SimdDouble b)
 219 {
 220     return { vec_andc(b.simdInternal_, a.simdInternal_) };
 221 }
 222
 223 static inline SimdDouble gmx_simdcall operator|(SimdDouble a, SimdDouble b)
 224 {
 225     return { vec_or(a.simdInternal_, b.simdInternal_) };
 226 }
 227
 228 static inline SimdDouble gmx_simdcall operator^(SimdDouble a, SimdDouble b)
 229 {
 230     return { vec_xor(a.simdInternal_, b.simdInternal_) };
 231 }
 232
 233 static inline SimdDouble gmx_simdcall operator+(SimdDouble a, SimdDouble b)
 234 {
 235     return { vec_add(a.simdInternal_, b.simdInternal_) };
 236 }
 237
 238 static inline SimdDouble gmx_simdcall operator-(SimdDouble a, SimdDouble b)
 239 {
 240     return { vec_sub(a.simdInternal_, b.simdInternal_) };
 241 }
 242
 243 static inline SimdDouble gmx_simdcall operator-(SimdDouble x)
 244 {
 245     return { -x.simdInternal_ };
 246 }
 247
 248 static inline SimdDouble gmx_simdcall operator*(SimdDouble a, SimdDouble b)
 249 {
 250     return { vec_mul(a.simdInternal_, b.simdInternal_) };
 251 }
 252
 253 static inline SimdDouble gmx_simdcall fma(SimdDouble a, SimdDouble b, SimdDouble c)
 254 {
 255     return { vec_madd(a.simdInternal_, b.simdInternal_, c.simdInternal_) };
 256 }
 257
 258 static inline SimdDouble gmx_simdcall fms(SimdDouble a, SimdDouble b, SimdDouble c)
 259 {
 260     return { vec_msub(a.simdInternal_, b.simdInternal_, c.simdInternal_) };
 261 }
 262
 263 static inline SimdDouble gmx_simdcall fnma(SimdDouble a, SimdDouble b, SimdDouble c)
 264 {
 265     return { vec_nmsub(a.simdInternal_, b.simdInternal_, c.simdInternal_) };
 266 }
 267
 268 static inline SimdDouble gmx_simdcall fnms(SimdDouble a, SimdDouble b, SimdDouble c)
 269 {
 270     return { vec_nmadd(a.simdInternal_, b.simdInternal_, c.simdInternal_) };
 271 }
 272
 273 static inline SimdDouble gmx_simdcall rsqrt(SimdDouble x)
 274 {
 275     return { vec_rsqrte(x.simdInternal_) };
 276 }
 277
 278 static inline SimdDouble gmx_simdcall rcp(SimdDouble x)
 279 {
 280     return { vec_re(x.simdInternal_) };
 281 }
 282
 283 static inline SimdDouble gmx_simdcall maskAdd(SimdDouble a, SimdDouble b, SimdDBool m)
 284 {
 285     return { vec_add(a.simdInternal_,
 286                      vec_and(b.simdInternal_, reinterpret_cast<__vector double>(m.simdInternal_))) };
 287 }
 288
 289 static inline SimdDouble gmx_simdcall maskzMul(SimdDouble a, SimdDouble b, SimdDBool m)
 290 {
 291     SimdDouble prod = a * b;
 292
 293     return { vec_and(prod.simdInternal_, reinterpret_cast<__vector double>(m.simdInternal_)) };
 294 }
 295
 296 static inline SimdDouble gmx_simdcall maskzFma(SimdDouble a, SimdDouble b, SimdDouble c, SimdDBool m)
 297 {
 298     SimdDouble prod = fma(a, b, c);
 299
 300     return { vec_and(prod.simdInternal_, reinterpret_cast<__vector double>(m.simdInternal_)) };
 301 }
 302
 303 static inline SimdDouble gmx_simdcall maskzRsqrt(SimdDouble x, SimdDBool m)
 304 {
 305 #ifndef NDEBUG
 306     x.simdInternal_ = vec_sel(vec_splats(1.0), x.simdInternal_, m.simdInternal_);
 307 #endif
 308     return { vec_and(vec_rsqrte(x.simdInternal_), reinterpret_cast<__vector double>(m.simdInternal_)) };
 309 }
 310
 311 static inline SimdDouble gmx_simdcall maskzRcp(SimdDouble x, SimdDBool m)
 312 {
 313 #ifndef NDEBUG
 314     x.simdInternal_ = vec_sel(vec_splats(1.0), x.simdInternal_, m.simdInternal_);
 315 #endif
 316     return { vec_and(vec_re(x.simdInternal_), reinterpret_cast<__vector double>(m.simdInternal_)) };
 317 }
 318
 319 static inline SimdDouble gmx_simdcall abs(SimdDouble x)
 320 {
 321     return { vec_abs(x.simdInternal_) };
 322 }
 323
 324 static inline SimdDouble gmx_simdcall max(SimdDouble a, SimdDouble b)
 325 {
 326     return { vec_max(a.simdInternal_, b.simdInternal_) };
 327 }
 328
 329 static inline SimdDouble gmx_simdcall min(SimdDouble a, SimdDouble b)
 330 {
 331     return { vec_min(a.simdInternal_, b.simdInternal_) };
 332 }
 333
 334 static inline SimdDouble gmx_simdcall round(SimdDouble x)
 335 {
 336 #if defined(__GNUC__) && !defined(__ibmxl__) && !defined(__xlC__)
 337     // gcc up to at least version 4.9 does not have vec_round() in double precision - use inline asm
 338     __vector double res;
 339     __asm__("xvrdpi %x0,%x1" : "=wd"(res) : "wd"(x.simdInternal_));
 340     return { res };
 341 #else
 342     return { vec_round(x.simdInternal_) };
 343 #endif
 344 }
 345
 346 static inline SimdDouble gmx_simdcall trunc(SimdDouble x)
 347 {
 348     return { vec_trunc(x.simdInternal_) };
 349 }
 350
 351 static inline SimdDouble frexp(SimdDouble value, SimdDInt32* exponent)
 352 {
 353     const __vector double exponentMask =
 354             reinterpret_cast<__vector double>(vec_splats(0x7FF0000000000000ULL));
 355     const __vector signed int exponentBias = vec_splats(1022);
 356     const __vector double     half         = vec_splats(0.5);
 357     __vector signed int       iExponent;
 358
 359     iExponent = reinterpret_cast<__vector signed int>(vec_and(value.simdInternal_, exponentMask));
 360     // The data is in the upper half of each double (corresponding to elements 1 and 3).
 361     // First shift 52-32=20bits, and then permute to swap element 0 with 1 and element 2 with 3
 362     // For big endian they are in opposite order, so then we simply skip the swap.
 363     iExponent = vec_sr(iExponent, vec_splats(20U));
 364 #ifndef __BIG_ENDIAN__
 365     const __vector unsigned char perm = { 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11 };
 366     iExponent                         = vec_perm(iExponent, iExponent, perm);
 367 #endif
 368     iExponent               = vec_sub(iExponent, exponentBias);
 369     exponent->simdInternal_ = iExponent;
 370
 371     return { vec_or(vec_andc(value.simdInternal_, exponentMask), half) };
 372 }
 373
 374 template<MathOptimization opt = MathOptimization::Safe>
 375 static inline SimdDouble ldexp(SimdDouble value, SimdDInt32 exponent)
 376 {
 377     const __vector signed int exponentBias = vec_splats(1023);
 378     __vector signed int       iExponent;
 379 #ifdef __BIG_ENDIAN__
 380     const __vector unsigned char perm = { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 16, 17, 18, 19 };
 381 #else
 382     const __vector unsigned char perm = { 16, 17, 18, 19, 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11 };
 383 #endif
 384
 385     iExponent = vec_add(exponent.simdInternal_, exponentBias);
 386
 387     if (opt == MathOptimization::Safe)
 388     {
 389         // Make sure biased argument is not negative
 390         iExponent = vec_max(iExponent, vec_splat_s32(0));
 391     }
 392
 393     // exponent is now present in pairs of integers; 0011.
 394     // Elements 0/2 already correspond to the upper half of each double,
 395     // so we only need to shift by another 52-32=20 bits.
 396     // The remaining elements are set to zero.
 397     iExponent = vec_sl(iExponent, vec_splats(20U));
 398     iExponent = vec_perm(iExponent, vec_splats(0), perm);
 399
 400     return { vec_mul(value.simdInternal_, reinterpret_cast<__vector double>(iExponent)) };
 401 }
 402
 403 static inline double gmx_simdcall reduce(SimdDouble x)
 404 {
 405     const __vector unsigned char perm = { 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7 };
 406 #ifdef __xlC__
 407     /* old xlc version 12 does not understand vec_perm() with double arguments */
 408     x.simdInternal_ = vec_add(
 409             x.simdInternal_, reinterpret_cast<__vector double>(vec_perm(
 410                                      reinterpret_cast<__vector signed int>(x.simdInternal_),
 411                                      reinterpret_cast<__vector signed int>(x.simdInternal_), perm)));
 412 #else
 413     x.simdInternal_ = vec_add(x.simdInternal_, vec_perm(x.simdInternal_, x.simdInternal_, perm));
 414 #endif
 415     return vec_extract(x.simdInternal_, 0);
 416 }
 417
 418 static inline SimdDBool gmx_simdcall operator==(SimdDouble a, SimdDouble b)
 419 {
 420     return { vec_cmpeq(a.simdInternal_, b.simdInternal_) };
 421 }
 422
 423 static inline SimdDBool gmx_simdcall operator!=(SimdDouble a, SimdDouble b)
 424 {
 425     return { reinterpret_cast<__vector vsxBool long long>(vec_or(
 426             reinterpret_cast<__vector signed int>(vec_cmpgt(a.simdInternal_, b.simdInternal_)),
 427             reinterpret_cast<__vector signed int>(vec_cmplt(a.simdInternal_, b.simdInternal_)))) };
 428 }
 429
 430 static inline SimdDBool gmx_simdcall operator<(SimdDouble a, SimdDouble b)
 431 {
 432     return { vec_cmplt(a.simdInternal_, b.simdInternal_) };
 433 }
 434
 435 static inline SimdDBool gmx_simdcall operator<=(SimdDouble a, SimdDouble b)
 436 {
 437     return { vec_cmple(a.simdInternal_, b.simdInternal_) };
 438 }
 439
 440 static inline SimdDBool gmx_simdcall testBits(SimdDouble a)
 441 {
 442 #ifdef __POWER8_VECTOR__
 443     // Power8 VSX has proper support for operations on long long integers
 444     return { vec_cmpgt(reinterpret_cast<__vector unsigned long long>(a.simdInternal_), vec_splats(0ULL)) };
 445 #else
 446     // No support for long long operations.
 447     // Start with comparing 32-bit subfields bitwise by casting to integers
 448     __vector vsxBool int tmp =
 449             vec_cmpgt(reinterpret_cast<__vector unsigned int>(a.simdInternal_), vec_splats(0U));
 450
 451     // Shuffle low/high 32-bit fields of tmp into tmp2
 452     const __vector unsigned char perm = { 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11 };
 453     __vector vsxBool int tmp2 = vec_perm(tmp, tmp, perm);
 454
 455     // Return the or:d parts of tmp & tmp2
 456     return { reinterpret_cast<__vector vsxBool long long>(vec_or(tmp, tmp2)) };
 457 #endif
 458 }
 459
 460 static inline SimdDBool gmx_simdcall operator&&(SimdDBool a, SimdDBool b)
 461 {
 462     return { reinterpret_cast<__vector vsxBool long long>(
 463             vec_and(reinterpret_cast<__vector signed int>(a.simdInternal_),
 464                     reinterpret_cast<__vector signed int>(b.simdInternal_))) };
 465 }
 466
 467 static inline SimdDBool gmx_simdcall operator||(SimdDBool a, SimdDBool b)
 468 {
 469     return { reinterpret_cast<__vector vsxBool long long>(
 470             vec_or(reinterpret_cast<__vector signed int>(a.simdInternal_),
 471                    reinterpret_cast<__vector signed int>(b.simdInternal_))) };
 472 }
 473
 474 static inline bool gmx_simdcall anyTrue(SimdDBool a)
 475 {
 476     return vec_any_ne(reinterpret_cast<__vector vsxBool int>(a.simdInternal_),
 477                       reinterpret_cast<__vector vsxBool int>(vec_splats(0)));
 478 }
 479
 480 static inline SimdDouble gmx_simdcall selectByMask(SimdDouble a, SimdDBool m)
 481 {
 482     return { vec_and(a.simdInternal_, reinterpret_cast<__vector double>(m.simdInternal_)) };
 483 }
 484
 485 static inline SimdDouble gmx_simdcall selectByNotMask(SimdDouble a, SimdDBool m)
 486 {
 487     return { vec_andc(a.simdInternal_, reinterpret_cast<__vector double>(m.simdInternal_)) };
 488 }
 489
 490 static inline SimdDouble gmx_simdcall blend(SimdDouble a, SimdDouble b, SimdDBool sel)
 491 {
 492     return { vec_sel(a.simdInternal_, b.simdInternal_, sel.simdInternal_) };
 493 }
 494
 495 static inline SimdDInt32 gmx_simdcall operator&(SimdDInt32 a, SimdDInt32 b)
 496 {
 497     return { vec_and(a.simdInternal_, b.simdInternal_) };
 498 }
 499
 500 static inline SimdDInt32 gmx_simdcall andNot(SimdDInt32 a, SimdDInt32 b)
 501 {
 502     return { vec_andc(b.simdInternal_, a.simdInternal_) };
 503 }
 504
 505 static inline SimdDInt32 gmx_simdcall operator|(SimdDInt32 a, SimdDInt32 b)
 506 {
 507     return { vec_or(a.simdInternal_, b.simdInternal_) };
 508 }
 509
 510 static inline SimdDInt32 gmx_simdcall operator^(SimdDInt32 a, SimdDInt32 b)
 511 {
 512     return { vec_xor(a.simdInternal_, b.simdInternal_) };
 513 }
 514
 515 static inline SimdDInt32 gmx_simdcall operator+(SimdDInt32 a, SimdDInt32 b)
 516 {
 517     return { vec_add(a.simdInternal_, b.simdInternal_) };
 518 }
 519
 520 static inline SimdDInt32 gmx_simdcall operator-(SimdDInt32 a, SimdDInt32 b)
 521 {
 522     return { vec_sub(a.simdInternal_, b.simdInternal_) };
 523 }
 524
 525 static inline SimdDInt32 gmx_simdcall operator*(SimdDInt32 a, SimdDInt32 b)
 526 {
 527     return { a.simdInternal_ * b.simdInternal_ };
 528 }
 529
 530 static inline SimdDIBool gmx_simdcall operator==(SimdDInt32 a, SimdDInt32 b)
 531 {
 532     return { vec_cmpeq(a.simdInternal_, b.simdInternal_) };
 533 }
 534
 535 static inline SimdDIBool gmx_simdcall testBits(SimdDInt32 a)
 536 {
 537     return { vec_cmpgt(reinterpret_cast<__vector unsigned int>(a.simdInternal_), vec_splats(0U)) };
 538 }
 539
 540 static inline SimdDIBool gmx_simdcall operator<(SimdDInt32 a, SimdDInt32 b)
 541 {
 542     return { vec_cmplt(a.simdInternal_, b.simdInternal_) };
 543 }
 544
 545 static inline SimdDIBool gmx_simdcall operator&&(SimdDIBool a, SimdDIBool b)
 546 {
 547     return { vec_and(a.simdInternal_, b.simdInternal_) };
 548 }
 549
 550 static inline SimdDIBool gmx_simdcall operator||(SimdDIBool a, SimdDIBool b)
 551 {
 552     return { vec_or(a.simdInternal_, b.simdInternal_) };
 553 }
 554
 555 static inline bool gmx_simdcall anyTrue(SimdDIBool a)
 556 {
 557     return vec_any_ne(a.simdInternal_, reinterpret_cast<__vector vsxBool int>(vec_splats(0)));
 558 }
 559
 560 static inline SimdDInt32 gmx_simdcall selectByMask(SimdDInt32 a, SimdDIBool m)
 561 {
 562     return { vec_and(a.simdInternal_, reinterpret_cast<__vector signed int>(m.simdInternal_)) };
 563 }
 564
 565 static inline SimdDInt32 gmx_simdcall selectByNotMask(SimdDInt32 a, SimdDIBool m)
 566 {
 567     return { vec_andc(a.simdInternal_, reinterpret_cast<__vector signed int>(m.simdInternal_)) };
 568 }
 569
 570 static inline SimdDInt32 gmx_simdcall blend(SimdDInt32 a, SimdDInt32 b, SimdDIBool sel)
 571 {
 572     return { vec_sel(a.simdInternal_, b.simdInternal_, sel.simdInternal_) };
 573 }
 574
 575 static inline SimdDInt32 gmx_simdcall cvttR2I(SimdDouble a)
 576 {
 577 #if defined(__GNUC__) && !defined(__ibmxl__) && !defined(__xlC__)
 578     // gcc up to at least version 6.1 is missing intrinsics for converting double to/from int - use inline asm
 579     const __vector unsigned char perm = { 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11 };
 580     __vector double              ix;
 581
 582     __asm__("xvcvdpsxws %x0,%x1" : "=wa"(ix) : "wd"(a.simdInternal_));
 583
 584     return { reinterpret_cast<__vector signed int>(vec_perm(ix, ix, perm)) };
 585 #else
 586     return { vec_cts(a.simdInternal_, 0) };
 587 #endif
 588 }
 589
 590 static inline SimdDInt32 gmx_simdcall cvtR2I(SimdDouble a)
 591 {
 592     return cvttR2I(round(a));
 593 }
 594
 595 static inline SimdDouble gmx_simdcall cvtI2R(SimdDInt32 a)
 596 {
 597 #if defined(__GNUC__) && !defined(__ibmxl__) && !defined(__xlC__)
 598     // gcc up to at least version 4.9 is missing intrinsics for converting double to/from int - use inline asm
 599     __vector double x;
 600 #    ifndef __BIG_ENDIAN__
 601     const __vector unsigned char perm = { 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11 };
 602     a.simdInternal_                   = vec_perm(a.simdInternal_, a.simdInternal_, perm);
 603 #    endif
 604
 605     __asm__("xvcvsxwdp %x0,%x1" : "=wd"(x) : "wa"(a.simdInternal_));
 606
 607     return { x };
 608 #else
 609     return { vec_ctd(a.simdInternal_, 0) };
 610 #endif
 611 }
 612
 613 static inline SimdDIBool gmx_simdcall cvtB2IB(SimdDBool a)
 614 {
 615     return { reinterpret_cast<__vector vsxBool int>(a.simdInternal_) };
 616 }
 617
 618 static inline SimdDBool gmx_simdcall cvtIB2B(SimdDIBool a)
 619 {
 620     return { reinterpret_cast<__vector vsxBool long long>(a.simdInternal_) };
 621 }
 622
 623 static inline void gmx_simdcall cvtF2DD(SimdFloat f, SimdDouble* d0, SimdDouble* d1)
 624 {
 625     __vector float fA, fB;
 626     fA = vec_mergeh(f.simdInternal_, f.simdInternal_); /* 0011 */
 627     fB = vec_mergel(f.simdInternal_, f.simdInternal_); /* 2233 */
 628 #if defined(__GNUC__) && !defined(__ibmxl__) && !defined(__xlC__)
 629     // gcc-4.9 is missing double-to-float/float-to-double conversions.
 630     __asm__("xvcvspdp %x0,%x1" : "=wd"(d0->simdInternal_) : "wf"(fA));
 631     __asm__("xvcvspdp %x0,%x1" : "=wd"(d1->simdInternal_) : "wf"(fB));
 632 #else
 633     d0->simdInternal_ = vec_cvf(fA); /* 01 */
 634     d1->simdInternal_ = vec_cvf(fB); /* 23 */
 635 #endif
 636 }
 637
 638 static inline SimdFloat gmx_simdcall cvtDD2F(SimdDouble d0, SimdDouble d1)
 639 {
 640     __vector float fA, fB, fC, fD, fE;
 641 #if defined(__GNUC__) && !defined(__ibmxl__) && !defined(__xlC__)
 642     // gcc-4.9 is missing double-to-float/float-to-double conversions.
 643     __asm__("xvcvdpsp %x0,%x1" : "=wf"(fA) : "wd"(d0.simdInternal_));
 644     __asm__("xvcvdpsp %x0,%x1" : "=wf"(fB) : "wd"(d1.simdInternal_));
 645 #else
 646     fA = vec_cvf(d0.simdInternal_);  /* 0x1x */
 647     fB = vec_cvf(d1.simdInternal_);  /* 2x3x */
 648 #endif
 649     fC = vec_mergeh(fA, fB); /* 02xx */
 650     fD = vec_mergel(fA, fB); /* 13xx */
 651     fE = vec_mergeh(fC, fD); /* 0123 */
 652     return { fE };
 653 }
 654
 655 static inline SimdDouble gmx_simdcall copysign(SimdDouble x, SimdDouble y)
 656 {
 657 #if defined(__GNUC__) && !defined(__ibmxl__) && !defined(__xlC__)
 658     __vector double res;
 659     __asm__("xvcpsgndp %x0,%x1,%x2" : "=wd"(res) : "wd"(y.simdInternal_), "wd"(x.simdInternal_));
 660     return { res };
 661 #else
 662     return { vec_cpsgn(y.simdInternal_, x.simdInternal_) };
 663 #endif
 664 }
 665
 666 } // namespace gmx
 667
 668 #endif // GMX_SIMD_IMPLEMENTATION_IBM_VSX_SIMD_DOUBLE_H