src/gromacs/simd/impl_ibm_vsx/impl_ibm_vsx_simd_float.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 #ifndef GMX_SIMD_IMPLEMENTATION_IBM_VSX_SIMD_FLOAT_H
  37 #define GMX_SIMD_IMPLEMENTATION_IBM_VSX_SIMD_FLOAT_H
  38
  39 #include "config.h"
  40
  41 #include "gromacs/math/utilities.h"
  42 #include "gromacs/utility/basedefinitions.h"
  43
  44 #include "impl_ibm_vsx_definitions.h"
  45
  46 namespace gmx
  47 {
  48
  49 class SimdFloat
  50 {
  51 public:
  52     SimdFloat() {}
  53
  54     // gcc-4.9 does not recognize that we use the parameter
  55     SimdFloat(float gmx_unused f) : simdInternal_(vec_splats(f)) {}
  56
  57     // Internal utility constructor to simplify return statements
  58     SimdFloat(__vector float simd) : simdInternal_(simd) {}
  59
  60     __vector float simdInternal_;
  61 };
  62
  63 class SimdFInt32
  64 {
  65 public:
  66     SimdFInt32() {}
  67
  68     // gcc-4.9 does not recognize that we use the parameter
  69     SimdFInt32(std::int32_t gmx_unused i) : simdInternal_(vec_splats(i)) {}
  70
  71     // Internal utility constructor to simplify return statements
  72     SimdFInt32(__vector signed int simd) : simdInternal_(simd) {}
  73
  74     __vector signed int simdInternal_;
  75 };
  76
  77 class SimdFBool
  78 {
  79 public:
  80     SimdFBool() {}
  81
  82     SimdFBool(bool b) :
  83         simdInternal_(reinterpret_cast<__vector vsxBool int>(vec_splats(b ? 0xFFFFFFFF : 0)))
  84     {
  85     }
  86
  87     // Internal utility constructor to simplify return statements
  88     SimdFBool(__vector vsxBool int simd) : simdInternal_(simd) {}
  89
  90     __vector vsxBool int simdInternal_;
  91 };
  92
  93 class SimdFIBool
  94 {
  95 public:
  96     SimdFIBool() {}
  97
  98     SimdFIBool(bool b) :
  99         simdInternal_(reinterpret_cast<__vector vsxBool int>(vec_splats(b ? 0xFFFFFFFF : 0)))
 100     {
 101     }
 102
 103     // Internal utility constructor to simplify return statements
 104     SimdFIBool(__vector vsxBool int simd) : simdInternal_(simd) {}
 105
 106     __vector vsxBool int simdInternal_;
 107 };
 108
 109 // Note that the interfaces we use here have been a mess in xlc;
 110 // currently version 13.1.5 is required.
 111
 112 static inline SimdFloat gmx_simdcall simdLoad(const float* m, SimdFloatTag = {})
 113 {
 114     return { *reinterpret_cast<const __vector float*>(m) };
 115 }
 116
 117 static inline void gmx_simdcall store(float* m, SimdFloat a)
 118 {
 119     *reinterpret_cast<__vector float*>(m) = a.simdInternal_;
 120 }
 121
 122 static inline SimdFloat gmx_simdcall simdLoadU(const float* m, SimdFloatTag = {})
 123 {
 124     return
 125     {
 126 #if __GNUC__ < 7
 127         *reinterpret_cast<const __vector float*>(m)
 128 #else
 129         vec_xl(0, m)
 130 #endif
 131     };
 132 }
 133
 134 static inline void gmx_simdcall storeU(float* m, SimdFloat a)
 135 {
 136 #if __GNUC__ < 7
 137     *reinterpret_cast<__vector float*>(m) = a.simdInternal_;
 138 #else
 139     vec_xst(a.simdInternal_, 0, m);
 140 #endif
 141 }
 142
 143 static inline SimdFloat gmx_simdcall setZeroF()
 144 {
 145     return { vec_splats(0.0F) };
 146 }
 147
 148 static inline SimdFInt32 gmx_simdcall simdLoad(const std::int32_t* m, SimdFInt32Tag)
 149 {
 150     return { *reinterpret_cast<const __vector int*>(m) };
 151 }
 152
 153 static inline void gmx_simdcall store(std::int32_t* m, SimdFInt32 a)
 154 {
 155     *reinterpret_cast<__vector int*>(m) = a.simdInternal_;
 156 }
 157
 158 static inline SimdFInt32 gmx_simdcall simdLoadU(const std::int32_t* m, SimdFInt32Tag)
 159 {
 160     return
 161     {
 162 #if __GNUC__ < 7
 163         *reinterpret_cast<const __vector int*>(m)
 164 #else
 165         vec_xl(0, m)
 166 #endif
 167     };
 168 }
 169
 170 static inline void gmx_simdcall storeU(std::int32_t* m, SimdFInt32 a)
 171 {
 172 #if __GNUC__ < 7
 173     *reinterpret_cast<__vector int*>(m) = a.simdInternal_;
 174 #else
 175     vec_xst(a.simdInternal_, 0, m);
 176 #endif
 177 }
 178
 179 static inline SimdFInt32 gmx_simdcall setZeroFI()
 180 {
 181     return { vec_splats(static_cast<int>(0)) };
 182 }
 183
 184 // gcc-4.9 does not detect that vec_extract() uses its argument
 185 template<int index>
 186 static inline std::int32_t gmx_simdcall extract(SimdFInt32 gmx_unused a)
 187 {
 188     return vec_extract(a.simdInternal_, index);
 189 }
 190
 191 static inline SimdFloat gmx_simdcall operator&(SimdFloat a, SimdFloat b)
 192 {
 193     return { vec_and(a.simdInternal_, b.simdInternal_) };
 194 }
 195
 196 static inline SimdFloat gmx_simdcall andNot(SimdFloat a, SimdFloat b)
 197 {
 198     return { vec_andc(b.simdInternal_, a.simdInternal_) };
 199 }
 200
 201 static inline SimdFloat gmx_simdcall operator|(SimdFloat a, SimdFloat b)
 202 {
 203     return { vec_or(a.simdInternal_, b.simdInternal_) };
 204 }
 205
 206 static inline SimdFloat gmx_simdcall operator^(SimdFloat a, SimdFloat b)
 207 {
 208     return { vec_xor(a.simdInternal_, b.simdInternal_) };
 209 }
 210
 211 static inline SimdFloat gmx_simdcall operator+(SimdFloat a, SimdFloat b)
 212 {
 213     return { vec_add(a.simdInternal_, b.simdInternal_) };
 214 }
 215
 216 static inline SimdFloat gmx_simdcall operator-(SimdFloat a, SimdFloat b)
 217 {
 218     return { vec_sub(a.simdInternal_, b.simdInternal_) };
 219 }
 220
 221 static inline SimdFloat gmx_simdcall operator-(SimdFloat x)
 222 {
 223     return { -x.simdInternal_ };
 224 }
 225
 226 static inline SimdFloat gmx_simdcall operator*(SimdFloat a, SimdFloat b)
 227 {
 228     return { vec_mul(a.simdInternal_, b.simdInternal_) };
 229 }
 230
 231 static inline SimdFloat gmx_simdcall fma(SimdFloat a, SimdFloat b, SimdFloat c)
 232 {
 233     return { vec_madd(a.simdInternal_, b.simdInternal_, c.simdInternal_) };
 234 }
 235
 236 static inline SimdFloat gmx_simdcall fms(SimdFloat a, SimdFloat b, SimdFloat c)
 237 {
 238     return { vec_msub(a.simdInternal_, b.simdInternal_, c.simdInternal_) };
 239 }
 240
 241 static inline SimdFloat gmx_simdcall fnma(SimdFloat a, SimdFloat b, SimdFloat c)
 242 {
 243     return { vec_nmsub(a.simdInternal_, b.simdInternal_, c.simdInternal_) };
 244 }
 245
 246 static inline SimdFloat gmx_simdcall fnms(SimdFloat a, SimdFloat b, SimdFloat c)
 247 {
 248     return { vec_nmadd(a.simdInternal_, b.simdInternal_, c.simdInternal_) };
 249 }
 250
 251 static inline SimdFloat gmx_simdcall rsqrt(SimdFloat x)
 252 {
 253     return { vec_rsqrte(x.simdInternal_) };
 254 }
 255
 256 static inline SimdFloat gmx_simdcall rcp(SimdFloat x)
 257 {
 258     return { vec_re(x.simdInternal_) };
 259 }
 260
 261 static inline SimdFloat gmx_simdcall maskAdd(SimdFloat a, SimdFloat b, SimdFBool m)
 262 {
 263     return { vec_add(a.simdInternal_,
 264                      vec_and(b.simdInternal_, reinterpret_cast<__vector float>(m.simdInternal_))) };
 265 }
 266
 267 static inline SimdFloat gmx_simdcall maskzMul(SimdFloat a, SimdFloat b, SimdFBool m)
 268 {
 269     SimdFloat prod = a * b;
 270
 271     return { vec_and(prod.simdInternal_, reinterpret_cast<__vector float>(m.simdInternal_)) };
 272 }
 273
 274 static inline SimdFloat gmx_simdcall maskzFma(SimdFloat a, SimdFloat b, SimdFloat c, SimdFBool m)
 275 {
 276     SimdFloat prod = fma(a, b, c);
 277
 278     return { vec_and(prod.simdInternal_, reinterpret_cast<__vector float>(m.simdInternal_)) };
 279 }
 280
 281 static inline SimdFloat gmx_simdcall maskzRsqrt(SimdFloat x, SimdFBool m)
 282 {
 283 #ifndef NDEBUG
 284     x.simdInternal_ = vec_sel(vec_splats(1.0F), x.simdInternal_, m.simdInternal_);
 285 #endif
 286     return { vec_and(vec_rsqrte(x.simdInternal_), reinterpret_cast<__vector float>(m.simdInternal_)) };
 287 }
 288
 289 static inline SimdFloat gmx_simdcall maskzRcp(SimdFloat x, SimdFBool m)
 290 {
 291 #ifndef NDEBUG
 292     x.simdInternal_ = vec_sel(vec_splats(1.0F), x.simdInternal_, m.simdInternal_);
 293 #endif
 294     return { vec_and(vec_re(x.simdInternal_), reinterpret_cast<__vector float>(m.simdInternal_)) };
 295 }
 296
 297 static inline SimdFloat gmx_simdcall abs(SimdFloat x)
 298 {
 299     return { vec_abs(x.simdInternal_) };
 300 }
 301
 302 static inline SimdFloat gmx_simdcall max(SimdFloat a, SimdFloat b)
 303 {
 304     return { vec_max(a.simdInternal_, b.simdInternal_) };
 305 }
 306
 307 static inline SimdFloat gmx_simdcall min(SimdFloat a, SimdFloat b)
 308 {
 309     return { vec_min(a.simdInternal_, b.simdInternal_) };
 310 }
 311
 312 static inline SimdFloat gmx_simdcall round(SimdFloat x)
 313 {
 314     return { vec_round(x.simdInternal_) };
 315 }
 316
 317 static inline SimdFloat gmx_simdcall trunc(SimdFloat x)
 318 {
 319     return { vec_trunc(x.simdInternal_) };
 320 }
 321
 322 static inline SimdFloat gmx_simdcall frexp(SimdFloat value, SimdFInt32* exponent)
 323 {
 324     const __vector float exponentMask = reinterpret_cast<__vector float>(vec_splats(0x7F800000U));
 325     const __vector signed int exponentBias = vec_splats(126);
 326     const __vector float      half         = vec_splats(0.5F);
 327     __vector signed int       iExponent;
 328
 329     iExponent = reinterpret_cast<__vector signed int>(vec_and(value.simdInternal_, exponentMask));
 330     iExponent = vec_sub(vec_sr(iExponent, vec_splats(23U)), exponentBias);
 331     exponent->simdInternal_ = iExponent;
 332
 333     return { vec_or(vec_andc(value.simdInternal_, exponentMask), half) };
 334 }
 335
 336 template<MathOptimization opt = MathOptimization::Safe>
 337 static inline SimdFloat gmx_simdcall ldexp(SimdFloat value, SimdFInt32 exponent)
 338 {
 339     const __vector signed int exponentBias = vec_splats(127);
 340     __vector signed int       iExponent;
 341
 342     iExponent = vec_add(exponent.simdInternal_, exponentBias);
 343
 344     if (opt == MathOptimization::Safe)
 345     {
 346         // Make sure biased argument is not negative
 347         iExponent = vec_max(iExponent, vec_splat_s32(0));
 348     }
 349
 350     iExponent = vec_sl(iExponent, vec_splats(23U));
 351
 352     return { vec_mul(value.simdInternal_, reinterpret_cast<__vector float>(iExponent)) };
 353 }
 354
 355 static inline float gmx_simdcall reduce(SimdFloat x)
 356 {
 357     const __vector unsigned char perm1 = { 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7 };
 358     const __vector unsigned char perm2 = { 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3 };
 359
 360     x.simdInternal_ = vec_add(x.simdInternal_, vec_perm(x.simdInternal_, x.simdInternal_, perm1));
 361     x.simdInternal_ = vec_add(x.simdInternal_, vec_perm(x.simdInternal_, x.simdInternal_, perm2));
 362     return vec_extract(x.simdInternal_, 0);
 363 }
 364
 365 static inline SimdFBool gmx_simdcall operator==(SimdFloat a, SimdFloat b)
 366 {
 367     return { vec_cmpeq(a.simdInternal_, b.simdInternal_) };
 368 }
 369
 370 static inline SimdFBool gmx_simdcall operator!=(SimdFloat a, SimdFloat b)
 371 {
 372     return { vec_or(vec_cmpgt(a.simdInternal_, b.simdInternal_),
 373                     vec_cmplt(a.simdInternal_, b.simdInternal_)) };
 374 }
 375
 376 static inline SimdFBool gmx_simdcall operator<(SimdFloat a, SimdFloat b)
 377 {
 378     return { vec_cmplt(a.simdInternal_, b.simdInternal_) };
 379 }
 380
 381 static inline SimdFBool gmx_simdcall operator<=(SimdFloat a, SimdFloat b)
 382 {
 383     return { vec_cmple(a.simdInternal_, b.simdInternal_) };
 384 }
 385
 386 static inline SimdFBool gmx_simdcall testBits(SimdFloat a)
 387 {
 388     return { vec_cmpgt(reinterpret_cast<__vector unsigned int>(a.simdInternal_), vec_splats(0U)) };
 389 }
 390
 391 static inline SimdFBool gmx_simdcall operator&&(SimdFBool a, SimdFBool b)
 392 {
 393     return { vec_and(a.simdInternal_, b.simdInternal_) };
 394 }
 395
 396 static inline SimdFBool gmx_simdcall operator||(SimdFBool a, SimdFBool b)
 397 {
 398     return { vec_or(a.simdInternal_, b.simdInternal_) };
 399 }
 400
 401 static inline bool gmx_simdcall anyTrue(SimdFBool a)
 402 {
 403     return vec_any_ne(a.simdInternal_, reinterpret_cast<__vector vsxBool int>(vec_splats(0)));
 404 }
 405
 406 static inline SimdFloat gmx_simdcall selectByMask(SimdFloat a, SimdFBool m)
 407 {
 408     return { vec_and(a.simdInternal_, reinterpret_cast<__vector float>(m.simdInternal_)) };
 409 }
 410
 411 static inline SimdFloat gmx_simdcall selectByNotMask(SimdFloat a, SimdFBool m)
 412 {
 413     return { vec_andc(a.simdInternal_, reinterpret_cast<__vector float>(m.simdInternal_)) };
 414 }
 415
 416 static inline SimdFloat gmx_simdcall blend(SimdFloat a, SimdFloat b, SimdFBool sel)
 417 {
 418     return { vec_sel(a.simdInternal_, b.simdInternal_, sel.simdInternal_) };
 419 }
 420
 421 static inline SimdFInt32 gmx_simdcall operator&(SimdFInt32 a, SimdFInt32 b)
 422 {
 423     return { vec_and(a.simdInternal_, b.simdInternal_) };
 424 }
 425
 426 static inline SimdFInt32 gmx_simdcall andNot(SimdFInt32 a, SimdFInt32 b)
 427 {
 428     return { vec_andc(b.simdInternal_, a.simdInternal_) };
 429 }
 430
 431 static inline SimdFInt32 gmx_simdcall operator|(SimdFInt32 a, SimdFInt32 b)
 432 {
 433     return { vec_or(a.simdInternal_, b.simdInternal_) };
 434 }
 435
 436 static inline SimdFInt32 gmx_simdcall operator^(SimdFInt32 a, SimdFInt32 b)
 437 {
 438     return { vec_xor(a.simdInternal_, b.simdInternal_) };
 439 }
 440
 441 static inline SimdFInt32 gmx_simdcall operator+(SimdFInt32 a, SimdFInt32 b)
 442 {
 443     return { vec_add(a.simdInternal_, b.simdInternal_) };
 444 }
 445
 446 static inline SimdFInt32 gmx_simdcall operator-(SimdFInt32 a, SimdFInt32 b)
 447 {
 448     return { vec_sub(a.simdInternal_, b.simdInternal_) };
 449 }
 450
 451 static inline SimdFInt32 gmx_simdcall operator*(SimdFInt32 a, SimdFInt32 b)
 452 {
 453     return { a.simdInternal_ * b.simdInternal_ };
 454 }
 455
 456 static inline SimdFIBool gmx_simdcall operator==(SimdFInt32 a, SimdFInt32 b)
 457 {
 458     return { vec_cmpeq(a.simdInternal_, b.simdInternal_) };
 459 }
 460
 461 static inline SimdFIBool gmx_simdcall testBits(SimdFInt32 a)
 462 {
 463     return { vec_cmpgt(reinterpret_cast<__vector unsigned int>(a.simdInternal_), vec_splats(0U)) };
 464 }
 465
 466 static inline SimdFIBool gmx_simdcall operator<(SimdFInt32 a, SimdFInt32 b)
 467 {
 468     return { vec_cmplt(a.simdInternal_, b.simdInternal_) };
 469 }
 470
 471 static inline SimdFIBool gmx_simdcall operator&&(SimdFIBool a, SimdFIBool b)
 472 {
 473     return { vec_and(a.simdInternal_, b.simdInternal_) };
 474 }
 475
 476 static inline SimdFIBool gmx_simdcall operator||(SimdFIBool a, SimdFIBool b)
 477 {
 478     return { vec_or(a.simdInternal_, b.simdInternal_) };
 479 }
 480
 481 static inline bool gmx_simdcall anyTrue(SimdFIBool a)
 482 {
 483     return vec_any_ne(a.simdInternal_, reinterpret_cast<__vector vsxBool int>(vec_splats(0)));
 484 }
 485
 486 static inline SimdFInt32 gmx_simdcall selectByMask(SimdFInt32 a, SimdFIBool m)
 487 {
 488     return { vec_and(a.simdInternal_, reinterpret_cast<__vector signed int>(m.simdInternal_)) };
 489 }
 490
 491 static inline SimdFInt32 gmx_simdcall selectByNotMask(SimdFInt32 a, SimdFIBool m)
 492 {
 493     return { vec_andc(a.simdInternal_, reinterpret_cast<__vector signed int>(m.simdInternal_)) };
 494 }
 495
 496 static inline SimdFInt32 gmx_simdcall blend(SimdFInt32 a, SimdFInt32 b, SimdFIBool sel)
 497 {
 498     return { vec_sel(a.simdInternal_, b.simdInternal_, sel.simdInternal_) };
 499 }
 500
 501 static inline SimdFInt32 gmx_simdcall cvtR2I(SimdFloat a)
 502 {
 503     return { vec_cts(vec_round(a.simdInternal_), 0) };
 504 }
 505
 506 static inline SimdFInt32 gmx_simdcall cvttR2I(SimdFloat a)
 507 {
 508     return { vec_cts(a.simdInternal_, 0) };
 509 }
 510
 511 static inline SimdFloat gmx_simdcall cvtI2R(SimdFInt32 a)
 512 {
 513     return { vec_ctf(a.simdInternal_, 0) };
 514 }
 515
 516 static inline SimdFIBool gmx_simdcall cvtB2IB(SimdFBool a)
 517 {
 518     return { a.simdInternal_ };
 519 }
 520
 521 static inline SimdFBool gmx_simdcall cvtIB2B(SimdFIBool a)
 522 {
 523     return { a.simdInternal_ };
 524 }
 525
 526 static inline SimdFloat gmx_simdcall copysign(SimdFloat x, SimdFloat y)
 527 {
 528 #if defined(__GNUC__) && !defined(__ibmxl__) && !defined(__xlC__)
 529     __vector float res;
 530     __asm__("xvcpsgnsp %x0,%x1,%x2" : "=wf"(res) : "wf"(y.simdInternal_), "wf"(x.simdInternal_));
 531     return { res };
 532 #else
 533     return { vec_cpsgn(y.simdInternal_, x.simdInternal_) };
 534 #endif
 535 }
 536
 537 } // namespace gmx
 538
 539 #endif // GMX_SIMD_IMPLEMENTATION_IBM_VSX_SIMD_FLOAT_H