src/gromacs/simd/impl_ibm_qpx/impl_ibm_qpx_simd_double.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2014,2015,2016,2017, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 #ifndef GMX_SIMD_IMPLEMENTATION_IBM_QPX_SIMD_DOUBLE_H
  37 #define GMX_SIMD_IMPLEMENTATION_IBM_QPX_SIMD_DOUBLE_H
  38
  39 #include "config.h"
  40
  41 // Assert is buggy on xlc with high optimization, so we skip it for QPX
  42 #include <cmath>
  43 #include <cstddef>
  44 #include <cstdint>
  45
  46 #ifdef __clang__
  47 #include <qpxmath.h>
  48 #endif
  49
  50 #include "gromacs/math/utilities.h"
  51 #include "gromacs/utility/basedefinitions.h"
  52
  53 #include "impl_ibm_qpx_simd_float.h"
  54
  55 namespace gmx
  56 {
  57
  58 class SimdDouble
  59 {
  60     public:
  61         SimdDouble() {}
  62
  63         SimdDouble(double d) : simdInternal_(vec_splats(d)) {}
  64
  65         // Internal utility constructor to simplify return statements
  66         SimdDouble(vector4double simd) : simdInternal_(simd) {}
  67
  68         vector4double  simdInternal_;
  69 };
  70
  71 class SimdDInt32
  72 {
  73     public:
  74         SimdDInt32() {}
  75
  76         SimdDInt32(std::int32_t i)
  77         {
  78             alignas(GMX_SIMD_ALIGNMENT) std::int32_t  idata[GMX_SIMD_DINT32_WIDTH];
  79             idata[0]      = i;
  80             simdInternal_ = vec_splat(vec_ldia(0, idata), 0);
  81         }
  82
  83         // Internal utility constructor to simplify return statements
  84         SimdDInt32(vector4double simd) : simdInternal_(simd) {}
  85
  86         vector4double  simdInternal_;
  87 };
  88
  89 class SimdDBool
  90 {
  91     public:
  92         SimdDBool() {}
  93
  94         SimdDBool(bool b) : simdInternal_(vec_splats(b ? 1.0 : -1.0)) {}
  95
  96         // Internal utility constructor to simplify return statements
  97         SimdDBool(vector4double simd) : simdInternal_(simd) {}
  98
  99         vector4double  simdInternal_;
 100 };
 101
 102 static inline SimdDouble gmx_simdcall
 103 simdLoad(const double *m, SimdDoubleTag = {})
 104 {
 105 #ifdef NDEBUG
 106     return {
 107                vec_ld(0, const_cast<double *>(m))
 108     };
 109 #else
 110     return {
 111                vec_lda(0, const_cast<double *>(m))
 112     };
 113 #endif
 114 }
 115
 116 static inline void gmx_simdcall
 117 store(double *m, SimdDouble a)
 118 {
 119 #ifdef NDEBUG
 120     vec_st(a.simdInternal_, 0, m);
 121 #else
 122     vec_sta(a.simdInternal_, 0, m);
 123 #endif
 124 }
 125
 126 static inline SimdDouble gmx_simdcall
 127 setZeroD()
 128 {
 129     return {
 130                vec_splats(0.0)
 131     };
 132 }
 133
 134 static inline SimdDInt32 gmx_simdcall
 135 simdLoad(const std::int32_t * m, SimdDInt32Tag)
 136 {
 137 #ifdef NDEBUG
 138     return {
 139                vec_ldia(0, const_cast<int *>(m))
 140     };
 141 #else
 142     return {
 143                vec_ldiaa(0, const_cast<int *>(m))
 144     };
 145 #endif
 146 }
 147
 148 static inline void gmx_simdcall
 149 store(std::int32_t * m, SimdDInt32 a)
 150 {
 151     vec_st(a.simdInternal_, 0, m);
 152 }
 153
 154 static inline SimdDInt32 gmx_simdcall
 155 setZeroDI()
 156 {
 157     return {
 158                vec_splats(0.0)
 159     };
 160 }
 161
 162 static inline SimdDouble gmx_simdcall
 163 operator+(SimdDouble a, SimdDouble b)
 164 {
 165     return {
 166                vec_add(a.simdInternal_, b.simdInternal_)
 167     };
 168 }
 169
 170 static inline SimdDouble gmx_simdcall
 171 operator-(SimdDouble a, SimdDouble b)
 172 {
 173     return {
 174                vec_sub(a.simdInternal_, b.simdInternal_)
 175     };
 176 }
 177
 178 static inline SimdDouble gmx_simdcall
 179 operator-(SimdDouble x)
 180 {
 181     return {
 182                vec_neg(x.simdInternal_)
 183     };
 184 }
 185
 186 static inline SimdDouble gmx_simdcall
 187 operator*(SimdDouble a, SimdDouble b)
 188 {
 189     return {
 190                vec_mul(a.simdInternal_, b.simdInternal_)
 191     };
 192 }
 193
 194 static inline SimdDouble gmx_simdcall
 195 fma(SimdDouble a, SimdDouble b, SimdDouble c)
 196 {
 197     return {
 198                vec_madd(a.simdInternal_, b.simdInternal_, c.simdInternal_)
 199     };
 200 }
 201
 202 static inline SimdDouble gmx_simdcall
 203 fms(SimdDouble a, SimdDouble b, SimdDouble c)
 204 {
 205     return {
 206                vec_msub(a.simdInternal_, b.simdInternal_, c.simdInternal_)
 207     };
 208 }
 209
 210 static inline SimdDouble gmx_simdcall
 211 fnma(SimdDouble a, SimdDouble b, SimdDouble c)
 212 {
 213     return {
 214                vec_nmsub(a.simdInternal_, b.simdInternal_, c.simdInternal_)
 215     };
 216 }
 217
 218 static inline SimdDouble gmx_simdcall
 219 fnms(SimdDouble a, SimdDouble b, SimdDouble c)
 220 {
 221     return {
 222                vec_nmadd(a.simdInternal_, b.simdInternal_, c.simdInternal_)
 223     };
 224 }
 225
 226 static inline SimdDouble gmx_simdcall
 227 rsqrt(SimdDouble x)
 228 {
 229     return {
 230                vec_rsqrte(x.simdInternal_)
 231     };
 232 }
 233
 234 static inline SimdDouble gmx_simdcall
 235 rcp(SimdDouble x)
 236 {
 237     return {
 238                vec_re(x.simdInternal_)
 239     };
 240 }
 241
 242 static inline SimdDouble gmx_simdcall
 243 maskAdd(SimdDouble a, SimdDouble b, SimdDBool m)
 244 {
 245     return {
 246                vec_add(a.simdInternal_, vec_sel(vec_splats(0.0), b.simdInternal_, m.simdInternal_))
 247     };
 248 }
 249
 250 static inline SimdDouble gmx_simdcall
 251 maskzMul(SimdDouble a, SimdDouble b, SimdDBool m)
 252 {
 253     return {
 254                vec_sel(vec_splats(0.0), vec_mul(a.simdInternal_, b.simdInternal_), m.simdInternal_)
 255     };
 256 }
 257
 258 static inline SimdDouble
 259 maskzFma(SimdDouble a, SimdDouble b, SimdDouble c, SimdDBool m)
 260 {
 261     return {
 262                vec_sel(vec_splats(0.0), vec_madd(a.simdInternal_, b.simdInternal_, c.simdInternal_), m.simdInternal_)
 263     };
 264 }
 265
 266 static inline SimdDouble
 267 maskzRsqrt(SimdDouble x, SimdDBool m)
 268 {
 269 #ifndef NDEBUG
 270     x.simdInternal_ = vec_sel(vec_splats(1.0), x.simdInternal_, m.simdInternal_);
 271 #endif
 272     return {
 273                vec_sel(vec_splats(0.0), vec_rsqrte(x.simdInternal_), m.simdInternal_)
 274     };
 275 }
 276
 277 static inline SimdDouble
 278 maskzRcp(SimdDouble x, SimdDBool m)
 279 {
 280 #ifndef NDEBUG
 281     x.simdInternal_ = vec_sel(vec_splats(1.0), x.simdInternal_, m.simdInternal_);
 282 #endif
 283     return {
 284                vec_sel(vec_splats(0.0), vec_re(x.simdInternal_), m.simdInternal_)
 285     };
 286 }
 287
 288 static inline SimdDouble gmx_simdcall
 289 abs(SimdDouble x)
 290 {
 291     return {
 292                vec_abs( x.simdInternal_ )
 293     };
 294 }
 295
 296 static inline SimdDouble gmx_simdcall
 297 max(SimdDouble a, SimdDouble b)
 298 {
 299     return {
 300                vec_sel(b.simdInternal_, a.simdInternal_, vec_sub(a.simdInternal_, b.simdInternal_))
 301     };
 302 }
 303
 304 static inline SimdDouble gmx_simdcall
 305 min(SimdDouble a, SimdDouble b)
 306 {
 307     return {
 308                vec_sel(b.simdInternal_, a.simdInternal_, vec_sub(b.simdInternal_, a.simdInternal_))
 309     };
 310 }
 311
 312 static inline SimdDouble gmx_simdcall
 313 round(SimdDouble x)
 314 {
 315     // Note: It is critical to use vec_cfid(vec_ctid(a)) for the implementation
 316     // here, since vec_round() does not adhere to the FP control
 317     // word rounding scheme. We rely on float-to-float and float-to-integer
 318     // rounding being the same for half-way values in a few algorithms.
 319     return {
 320                vec_cfid(vec_ctid(x.simdInternal_))
 321     };
 322 }
 323
 324 static inline SimdDouble gmx_simdcall
 325 trunc(SimdDouble x)
 326 {
 327     return {
 328                vec_trunc(x.simdInternal_)
 329     };
 330 }
 331
 332 static inline SimdDouble
 333 frexp(SimdDouble value, SimdDInt32 * exponent)
 334 {
 335     alignas(GMX_SIMD_ALIGNMENT) double        rdata[GMX_SIMD_DOUBLE_WIDTH];
 336     alignas(GMX_SIMD_ALIGNMENT) std::int32_t  idata[GMX_SIMD_DOUBLE_WIDTH];
 337
 338     vec_st(value.simdInternal_, 0, rdata);
 339
 340     for (std::size_t i = 0; i < GMX_SIMD_DOUBLE_WIDTH; i++)
 341     {
 342         rdata[i] = std::frexp(rdata[i], idata + i);
 343     }
 344
 345     exponent->simdInternal_ = vec_ldia(0, idata);
 346     value.simdInternal_     = vec_ld(0, rdata);
 347
 348     return value;
 349 }
 350
 351 template <MathOptimization opt = MathOptimization::Safe>
 352 static inline SimdDouble
 353 ldexp(SimdDouble value, SimdDInt32 exponent)
 354 {
 355     alignas(GMX_SIMD_ALIGNMENT) double        rdata[GMX_SIMD_DOUBLE_WIDTH];
 356     alignas(GMX_SIMD_ALIGNMENT) std::int32_t  idata[GMX_SIMD_DOUBLE_WIDTH];
 357
 358     vec_st(value.simdInternal_,    0, rdata);
 359     vec_st(exponent.simdInternal_, 0, idata);
 360
 361     for (std::size_t i = 0; i < GMX_SIMD_DOUBLE_WIDTH; i++)
 362     {
 363         rdata[i] = std::ldexp(rdata[i], idata[i]);
 364     }
 365
 366     value.simdInternal_     = vec_ld(0, rdata);
 367
 368     return value;
 369 }
 370
 371 static inline double gmx_simdcall
 372 reduce(SimdDouble x)
 373 {
 374     vector4double y = vec_sldw(x.simdInternal_, x.simdInternal_, 2);
 375     vector4double z;
 376
 377     y = vec_add(y, x.simdInternal_);
 378     z = vec_sldw(y, y, 1);
 379     y = vec_add(y, z);
 380     return vec_extract(y, 0);
 381 }
 382
 383 static inline SimdDBool gmx_simdcall
 384 operator==(SimdDouble a, SimdDouble b)
 385 {
 386     return {
 387                vec_cmpeq(a.simdInternal_, b.simdInternal_)
 388     };
 389 }
 390
 391 static inline SimdDBool gmx_simdcall
 392 operator!=(SimdDouble a, SimdDouble b)
 393 {
 394     return {
 395                vec_not(vec_cmpeq(a.simdInternal_, b.simdInternal_))
 396     };
 397 }
 398
 399 static inline SimdDBool gmx_simdcall
 400 operator<(SimdDouble a, SimdDouble b)
 401 {
 402     return {
 403                vec_cmplt(a.simdInternal_, b.simdInternal_)
 404     };
 405 }
 406
 407 static inline SimdDBool gmx_simdcall
 408 operator<=(SimdDouble a, SimdDouble b)
 409 {
 410     return {
 411                vec_or(vec_cmplt(a.simdInternal_, b.simdInternal_), vec_cmpeq(a.simdInternal_, b.simdInternal_))
 412     };
 413 }
 414
 415 static inline SimdDBool gmx_simdcall
 416 operator&&(SimdDBool a, SimdDBool b)
 417 {
 418     return {
 419                vec_and(a.simdInternal_, b.simdInternal_)
 420     };
 421 }
 422
 423 static inline SimdDBool gmx_simdcall
 424 operator||(SimdDBool a, SimdDBool b)
 425 {
 426     return {
 427                vec_or(a.simdInternal_, b.simdInternal_)
 428     };
 429 }
 430
 431 static inline bool gmx_simdcall
 432 anyTrue(SimdDBool a)
 433 {
 434     vector4double b = vec_sldw(a.simdInternal_, a.simdInternal_, 2);
 435
 436     a.simdInternal_ = vec_or(a.simdInternal_, b);
 437     b               = vec_sldw(a.simdInternal_, a.simdInternal_, 1);
 438     b               = vec_or(a.simdInternal_, b);
 439     return (vec_extract(b, 0) > 0);
 440 }
 441
 442 static inline SimdDouble gmx_simdcall
 443 selectByMask(SimdDouble a, SimdDBool m)
 444 {
 445     return {
 446                vec_sel(vec_splats(0.0), a.simdInternal_, m.simdInternal_)
 447     };
 448 }
 449
 450 static inline SimdDouble gmx_simdcall
 451 selectByNotMask(SimdDouble a, SimdDBool m)
 452 {
 453     return {
 454                vec_sel(a.simdInternal_, vec_splats(0.0), m.simdInternal_)
 455     };
 456 }
 457
 458 static inline SimdDouble gmx_simdcall
 459 blend(SimdDouble a, SimdDouble b, SimdDBool sel)
 460 {
 461     return {
 462                vec_sel(a.simdInternal_, b.simdInternal_, sel.simdInternal_)
 463     };
 464 }
 465
 466 static inline SimdDInt32 gmx_simdcall
 467 cvtR2I(SimdDouble a)
 468 {
 469     return {
 470                vec_ctiw(a.simdInternal_)
 471     };
 472 }
 473
 474 static inline SimdDInt32 gmx_simdcall
 475 cvttR2I(SimdDouble a)
 476 {
 477     return {
 478                vec_ctiwz(a.simdInternal_)
 479     };
 480 }
 481
 482 static inline SimdDouble gmx_simdcall
 483 cvtI2R(SimdDInt32 a)
 484 {
 485     return {
 486                vec_cfid(a.simdInternal_)
 487     };
 488 }
 489
 490 static inline SimdDouble gmx_simdcall
 491 cvtF2D(SimdFloat f)
 492 {
 493     return {
 494                f.simdInternal_
 495     };
 496 }
 497
 498 static inline SimdFloat gmx_simdcall
 499 cvtD2F(SimdDouble d)
 500 {
 501     return {
 502                d.simdInternal_
 503     };
 504 }
 505
 506 static inline SimdDouble gmx_simdcall
 507 copysign(SimdDouble x, SimdDouble y)
 508 {
 509     return {
 510                vec_cpsgn(y.simdInternal_, x.simdInternal_)
 511     };
 512 }
 513
 514 }      // namespace gmx
 515
 516 #endif // GMX_SIMD_IMPLEMENTATION_IBM_QPX_SIMD_DOUBLE_H