src/gromacs/simd/impl_ibm_qpx/impl_ibm_qpx_simd4_double.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2014,2015,2017, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 #ifndef GMX_SIMD_IMPLEMENTATION_IBM_QPX_SIMD4_DOUBLE_H
  37 #define GMX_SIMD_IMPLEMENTATION_IBM_QPX_SIMD4_DOUBLE_H
  38
  39 #include "config.h"
  40
  41 #include "config.h"
  42
  43 // Assert is buggy on xlc with high optimization, so we skip it for QPX
  44 #include <cstddef>
  45
  46 #ifdef __clang__
  47 #include <qpxmath.h>
  48 #endif
  49
  50 namespace gmx
  51 {
  52
  53 class Simd4Double
  54 {
  55     public:
  56         Simd4Double() {}
  57
  58         Simd4Double(double d) : simdInternal_(vec_splats(d)) {}
  59
  60         // Internal utility constructor to simplify return statements
  61         Simd4Double(vector4double simd) : simdInternal_(simd) {}
  62
  63         vector4double  simdInternal_;
  64 };
  65
  66 class Simd4DBool
  67 {
  68     public:
  69         Simd4DBool() {}
  70
  71         //! \brief Construct from scalar bool
  72         Simd4DBool(bool b) : simdInternal_(vec_splats(b ? 1.0 : -1.0)) {}
  73
  74         // Internal utility constructor to simplify return statements
  75         Simd4DBool(vector4double simd) : simdInternal_(simd) {}
  76
  77         vector4double  simdInternal_;
  78 };
  79
  80 static inline Simd4Double gmx_simdcall
  81 load4(const double *m)
  82 {
  83 #ifdef NDEBUG
  84     return {
  85                vec_ld(0, const_cast<double *>(m))
  86     };
  87 #else
  88     return {
  89                vec_lda(0, const_cast<double *>(m))
  90     };
  91 #endif
  92 }
  93
  94 static inline void gmx_simdcall
  95 store4(double *m, Simd4Double a)
  96 {
  97 #ifdef NDEBUG
  98     vec_st(a.simdInternal_, 0, m);
  99 #else
 100     vec_sta(a.simdInternal_, 0, m);
 101 #endif
 102 }
 103
 104 static inline Simd4Double gmx_simdcall
 105 simd4SetZeroD()
 106 {
 107     return {
 108                vec_splats(0.0)
 109     };
 110 }
 111
 112 static inline Simd4Double gmx_simdcall
 113 operator+(Simd4Double a, Simd4Double b)
 114 {
 115     return {
 116                vec_add(a.simdInternal_, b.simdInternal_)
 117     };
 118 }
 119
 120 static inline Simd4Double gmx_simdcall
 121 operator-(Simd4Double a, Simd4Double b)
 122 {
 123     return {
 124                vec_sub(a.simdInternal_, b.simdInternal_)
 125     };
 126 }
 127
 128 static inline Simd4Double gmx_simdcall
 129 operator-(Simd4Double x)
 130 {
 131     return {
 132                vec_neg(x.simdInternal_)
 133     };
 134 }
 135
 136 static inline Simd4Double gmx_simdcall
 137 operator*(Simd4Double a, Simd4Double b)
 138 {
 139     return {
 140                vec_mul(a.simdInternal_, b.simdInternal_)
 141     };
 142 }
 143
 144 static inline Simd4Double gmx_simdcall
 145 fma(Simd4Double a, Simd4Double b, Simd4Double c)
 146 {
 147     return {
 148                vec_madd(a.simdInternal_, b.simdInternal_, c.simdInternal_)
 149     };
 150 }
 151
 152 static inline Simd4Double gmx_simdcall
 153 fms(Simd4Double a, Simd4Double b, Simd4Double c)
 154 {
 155     return {
 156                vec_msub(a.simdInternal_, b.simdInternal_, c.simdInternal_)
 157     };
 158 }
 159
 160 static inline Simd4Double gmx_simdcall
 161 fnma(Simd4Double a, Simd4Double b, Simd4Double c)
 162 {
 163     return {
 164                vec_nmsub(a.simdInternal_, b.simdInternal_, c.simdInternal_)
 165     };
 166 }
 167
 168 static inline Simd4Double gmx_simdcall
 169 fnms(Simd4Double a, Simd4Double b, Simd4Double c)
 170 {
 171     return {
 172                vec_nmadd(a.simdInternal_, b.simdInternal_, c.simdInternal_)
 173     };
 174 }
 175
 176 static inline Simd4Double gmx_simdcall
 177 rsqrt(Simd4Double x)
 178 {
 179     return {
 180                vec_rsqrte(x.simdInternal_)
 181     };
 182 }
 183
 184 static inline Simd4Double gmx_simdcall
 185 abs(Simd4Double x)
 186 {
 187     return {
 188                vec_abs( x.simdInternal_ )
 189     };
 190 }
 191
 192 static inline Simd4Double gmx_simdcall
 193 max(Simd4Double a, Simd4Double b)
 194 {
 195     return {
 196                vec_sel(b.simdInternal_, a.simdInternal_, vec_sub(a.simdInternal_, b.simdInternal_))
 197     };
 198 }
 199
 200 static inline Simd4Double gmx_simdcall
 201 min(Simd4Double a, Simd4Double b)
 202 {
 203     return {
 204                vec_sel(b.simdInternal_, a.simdInternal_, vec_sub(b.simdInternal_, a.simdInternal_))
 205     };
 206 }
 207
 208 static inline Simd4Double gmx_simdcall
 209 round(Simd4Double x)
 210 {
 211     // Note: It is critical to use vec_cfid(vec_ctid(a)) for the implementation
 212     // here, since vec_round() does not adhere to the FP control
 213     // word rounding scheme. We rely on float-to-float and float-to-integer
 214     // rounding being the same for half-way values in a few algorithms.
 215
 216     return {
 217                vec_cfid(vec_ctid(x.simdInternal_))
 218     };
 219 }
 220
 221 static inline Simd4Double gmx_simdcall
 222 trunc(Simd4Double x)
 223 {
 224     return {
 225                vec_trunc(x.simdInternal_)
 226     };
 227 }
 228
 229 static inline double gmx_simdcall
 230 dotProduct(Simd4Double a, Simd4Double b)
 231 {
 232     vector4double dp_sh0 = vec_mul(a.simdInternal_, b.simdInternal_);
 233     vector4double dp_sh1 = vec_sldw(dp_sh0, dp_sh0, 1);
 234     vector4double dp_sh2 = vec_sldw(dp_sh0, dp_sh0, 2);
 235     vector4double dp     = vec_add(dp_sh2, vec_add(dp_sh0, dp_sh1));
 236
 237     return vec_extract(dp, 0);
 238 }
 239
 240 static inline void gmx_simdcall
 241 transpose(Simd4Double * v0, Simd4Double * v1,
 242           Simd4Double * v2, Simd4Double * v3)
 243 {
 244     vector4double t0 = vec_perm(v0->simdInternal_, v2->simdInternal_, vec_gpci(00415));
 245     vector4double t1 = vec_perm(v0->simdInternal_, v2->simdInternal_, vec_gpci(02637));
 246     vector4double t2 = vec_perm(v1->simdInternal_, v3->simdInternal_, vec_gpci(00415));
 247     vector4double t3 = vec_perm(v1->simdInternal_, v3->simdInternal_, vec_gpci(02637));
 248     v0->simdInternal_ = vec_perm(t0, t2, vec_gpci(00415));
 249     v1->simdInternal_ = vec_perm(t0, t2, vec_gpci(02637));
 250     v2->simdInternal_ = vec_perm(t1, t3, vec_gpci(00415));
 251     v3->simdInternal_ = vec_perm(t1, t3, vec_gpci(02637));
 252 }
 253
 254 static inline Simd4DBool gmx_simdcall
 255 operator==(Simd4Double a, Simd4Double b)
 256 {
 257     return {
 258                vec_cmpeq(a.simdInternal_, b.simdInternal_)
 259     };
 260 }
 261
 262 static inline Simd4DBool gmx_simdcall
 263 operator!=(Simd4Double a, Simd4Double b)
 264 {
 265     return {
 266                vec_not(vec_cmpeq(a.simdInternal_, b.simdInternal_))
 267     };
 268 }
 269
 270 static inline Simd4DBool gmx_simdcall
 271 operator<(Simd4Double a, Simd4Double b)
 272 {
 273     return {
 274                vec_cmplt(a.simdInternal_, b.simdInternal_)
 275     };
 276 }
 277
 278 static inline Simd4DBool gmx_simdcall
 279 operator<=(Simd4Double a, Simd4Double b)
 280 {
 281     return {
 282                vec_or(vec_cmplt(a.simdInternal_, b.simdInternal_), vec_cmpeq(a.simdInternal_, b.simdInternal_))
 283     };
 284 }
 285
 286 static inline Simd4DBool gmx_simdcall
 287 operator&&(Simd4DBool a, Simd4DBool b)
 288 {
 289     return {
 290                vec_and(a.simdInternal_, b.simdInternal_)
 291     };
 292 }
 293
 294 static inline Simd4DBool gmx_simdcall
 295 operator||(Simd4DBool a, Simd4DBool b)
 296 {
 297     return {
 298                vec_or(a.simdInternal_, b.simdInternal_)
 299     };
 300 }
 301
 302 static inline bool gmx_simdcall
 303 anyTrue(Simd4DBool a)
 304 {
 305     vector4double b = vec_sldw(a.simdInternal_, a.simdInternal_, 2);
 306
 307     a.simdInternal_ = vec_or(a.simdInternal_, b);
 308     b               = vec_sldw(a.simdInternal_, a.simdInternal_, 1);
 309     b               = vec_or(a.simdInternal_, b);
 310     return (vec_extract(b, 0) > 0);
 311 }
 312
 313 static inline Simd4Double gmx_simdcall
 314 selectByMask(Simd4Double a, Simd4DBool m)
 315 {
 316     return {
 317                vec_sel(vec_splats(0.0), a.simdInternal_, m.simdInternal_)
 318     };
 319 }
 320
 321 static inline Simd4Double gmx_simdcall
 322 selectByNotMask(Simd4Double a, Simd4DBool m)
 323 {
 324     return {
 325                vec_sel(a.simdInternal_, vec_splats(0.0), m.simdInternal_)
 326     };
 327 }
 328
 329 static inline Simd4Double gmx_simdcall
 330 blend(Simd4Double a, Simd4Double b, Simd4DBool sel)
 331 {
 332     return {
 333                vec_sel(a.simdInternal_, b.simdInternal_, sel.simdInternal_)
 334     };
 335 }
 336
 337 static inline double gmx_simdcall
 338 reduce(Simd4Double x)
 339 {
 340     vector4double y = vec_sldw(x.simdInternal_, x.simdInternal_, 2);
 341     vector4double z;
 342
 343     y = vec_add(y, x.simdInternal_);
 344     z = vec_sldw(y, y, 1);
 345     y = vec_add(y, z);
 346     return vec_extract(y, 0);
 347 }
 348
 349 }      // namespace gmx
 350
 351 #endif // GMX_SIMD_IMPLEMENTATION_IBM_QPX_SIMD4_DOUBLE_H