2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2014,2015,2016,2017, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 #ifndef GMX_SIMD_IMPLEMENTATION_IBM_QPX_SIMD_DOUBLE_H
37 #define GMX_SIMD_IMPLEMENTATION_IBM_QPX_SIMD_DOUBLE_H
41 // Assert is buggy on xlc with high optimization, so we skip it for QPX
50 #include "gromacs/math/utilities.h"
51 #include "gromacs/utility/basedefinitions.h"
53 #include "impl_ibm_qpx_simd_float.h"
63 SimdDouble(double d) : simdInternal_(vec_splats(d)) {}
65 // Internal utility constructor to simplify return statements
66 SimdDouble(vector4double simd) : simdInternal_(simd) {}
68 vector4double simdInternal_;
76 SimdDInt32(std::int32_t i)
78 GMX_ALIGNED(int, GMX_SIMD_DINT32_WIDTH) idata[GMX_SIMD_DINT32_WIDTH];
80 simdInternal_ = vec_splat(vec_ldia(0, idata), 0);
83 // Internal utility constructor to simplify return statements
84 SimdDInt32(vector4double simd) : simdInternal_(simd) {}
86 vector4double simdInternal_;
94 SimdDBool(bool b) : simdInternal_(vec_splats(b ? 1.0 : -1.0)) {}
96 // Internal utility constructor to simplify return statements
97 SimdDBool(vector4double simd) : simdInternal_(simd) {}
99 vector4double simdInternal_;
102 static inline SimdDouble gmx_simdcall
103 simdLoad(const double *m, SimdDoubleTag = {})
107 vec_ld(0, const_cast<double *>(m))
111 vec_lda(0, const_cast<double *>(m))
116 static inline void gmx_simdcall
117 store(double *m, SimdDouble a)
120 vec_st(a.simdInternal_, 0, m);
122 vec_sta(a.simdInternal_, 0, m);
126 static inline SimdDouble gmx_simdcall
134 static inline SimdDInt32 gmx_simdcall
135 simdLoad(const std::int32_t * m, SimdDInt32Tag)
139 vec_ldia(0, const_cast<int *>(m))
143 vec_ldiaa(0, const_cast<int *>(m))
148 static inline void gmx_simdcall
149 store(std::int32_t * m, SimdDInt32 a)
151 vec_st(a.simdInternal_, 0, m);
154 static inline SimdDInt32 gmx_simdcall
162 static inline SimdDouble gmx_simdcall
163 operator+(SimdDouble a, SimdDouble b)
166 vec_add(a.simdInternal_, b.simdInternal_)
170 static inline SimdDouble gmx_simdcall
171 operator-(SimdDouble a, SimdDouble b)
174 vec_sub(a.simdInternal_, b.simdInternal_)
178 static inline SimdDouble gmx_simdcall
179 operator-(SimdDouble x)
182 vec_neg(x.simdInternal_)
186 static inline SimdDouble gmx_simdcall
187 operator*(SimdDouble a, SimdDouble b)
190 vec_mul(a.simdInternal_, b.simdInternal_)
194 static inline SimdDouble gmx_simdcall
195 fma(SimdDouble a, SimdDouble b, SimdDouble c)
198 vec_madd(a.simdInternal_, b.simdInternal_, c.simdInternal_)
202 static inline SimdDouble gmx_simdcall
203 fms(SimdDouble a, SimdDouble b, SimdDouble c)
206 vec_msub(a.simdInternal_, b.simdInternal_, c.simdInternal_)
210 static inline SimdDouble gmx_simdcall
211 fnma(SimdDouble a, SimdDouble b, SimdDouble c)
214 vec_nmsub(a.simdInternal_, b.simdInternal_, c.simdInternal_)
218 static inline SimdDouble gmx_simdcall
219 fnms(SimdDouble a, SimdDouble b, SimdDouble c)
222 vec_nmadd(a.simdInternal_, b.simdInternal_, c.simdInternal_)
226 static inline SimdDouble gmx_simdcall
230 vec_rsqrte(x.simdInternal_)
234 static inline SimdDouble gmx_simdcall
238 vec_re(x.simdInternal_)
242 static inline SimdDouble gmx_simdcall
243 maskAdd(SimdDouble a, SimdDouble b, SimdDBool m)
246 vec_add(a.simdInternal_, vec_sel(vec_splats(0.0), b.simdInternal_, m.simdInternal_))
250 static inline SimdDouble gmx_simdcall
251 maskzMul(SimdDouble a, SimdDouble b, SimdDBool m)
254 vec_sel(vec_splats(0.0), vec_mul(a.simdInternal_, b.simdInternal_), m.simdInternal_)
258 static inline SimdDouble
259 maskzFma(SimdDouble a, SimdDouble b, SimdDouble c, SimdDBool m)
262 vec_sel(vec_splats(0.0), vec_madd(a.simdInternal_, b.simdInternal_, c.simdInternal_), m.simdInternal_)
266 static inline SimdDouble
267 maskzRsqrt(SimdDouble x, SimdDBool m)
270 x.simdInternal_ = vec_sel(vec_splats(1.0), x.simdInternal_, m.simdInternal_);
273 vec_sel(vec_splats(0.0), vec_rsqrte(x.simdInternal_), m.simdInternal_)
277 static inline SimdDouble
278 maskzRcp(SimdDouble x, SimdDBool m)
281 x.simdInternal_ = vec_sel(vec_splats(1.0), x.simdInternal_, m.simdInternal_);
284 vec_sel(vec_splats(0.0), vec_re(x.simdInternal_), m.simdInternal_)
288 static inline SimdDouble gmx_simdcall
292 vec_abs( x.simdInternal_ )
296 static inline SimdDouble gmx_simdcall
297 max(SimdDouble a, SimdDouble b)
300 vec_sel(b.simdInternal_, a.simdInternal_, vec_sub(a.simdInternal_, b.simdInternal_))
304 static inline SimdDouble gmx_simdcall
305 min(SimdDouble a, SimdDouble b)
308 vec_sel(b.simdInternal_, a.simdInternal_, vec_sub(b.simdInternal_, a.simdInternal_))
312 static inline SimdDouble gmx_simdcall
315 // Note: It is critical to use vec_cfid(vec_ctid(a)) for the implementation
316 // here, since vec_round() does not adhere to the FP control
317 // word rounding scheme. We rely on float-to-float and float-to-integer
318 // rounding being the same for half-way values in a few algorithms.
320 vec_cfid(vec_ctid(x.simdInternal_))
324 static inline SimdDouble gmx_simdcall
328 vec_trunc(x.simdInternal_)
332 static inline SimdDouble
333 frexp(SimdDouble value, SimdDInt32 * exponent)
335 GMX_ALIGNED(double, GMX_SIMD_DOUBLE_WIDTH) rdata[GMX_SIMD_DOUBLE_WIDTH];
336 GMX_ALIGNED(int, GMX_SIMD_DOUBLE_WIDTH) idata[GMX_SIMD_DOUBLE_WIDTH];
338 vec_st(value.simdInternal_, 0, rdata);
340 for (std::size_t i = 0; i < GMX_SIMD_DOUBLE_WIDTH; i++)
342 rdata[i] = std::frexp(rdata[i], idata + i);
345 exponent->simdInternal_ = vec_ldia(0, idata);
346 value.simdInternal_ = vec_ld(0, rdata);
351 template <MathOptimization opt = MathOptimization::Safe>
352 static inline SimdDouble
353 ldexp(SimdDouble value, SimdDInt32 exponent)
355 GMX_ALIGNED(double, GMX_SIMD_DOUBLE_WIDTH) rdata[GMX_SIMD_DOUBLE_WIDTH];
356 GMX_ALIGNED(int, GMX_SIMD_DOUBLE_WIDTH) idata[GMX_SIMD_DOUBLE_WIDTH];
358 vec_st(value.simdInternal_, 0, rdata);
359 vec_st(exponent.simdInternal_, 0, idata);
361 for (std::size_t i = 0; i < GMX_SIMD_DOUBLE_WIDTH; i++)
363 rdata[i] = std::ldexp(rdata[i], idata[i]);
366 value.simdInternal_ = vec_ld(0, rdata);
371 static inline double gmx_simdcall
374 vector4double y = vec_sldw(x.simdInternal_, x.simdInternal_, 2);
377 y = vec_add(y, x.simdInternal_);
378 z = vec_sldw(y, y, 1);
380 return vec_extract(y, 0);
383 static inline SimdDBool gmx_simdcall
384 operator==(SimdDouble a, SimdDouble b)
387 vec_cmpeq(a.simdInternal_, b.simdInternal_)
391 static inline SimdDBool gmx_simdcall
392 operator!=(SimdDouble a, SimdDouble b)
395 vec_not(vec_cmpeq(a.simdInternal_, b.simdInternal_))
399 static inline SimdDBool gmx_simdcall
400 operator<(SimdDouble a, SimdDouble b)
403 vec_cmplt(a.simdInternal_, b.simdInternal_)
407 static inline SimdDBool gmx_simdcall
408 operator<=(SimdDouble a, SimdDouble b)
411 vec_or(vec_cmplt(a.simdInternal_, b.simdInternal_), vec_cmpeq(a.simdInternal_, b.simdInternal_))
415 static inline SimdDBool gmx_simdcall
416 operator&&(SimdDBool a, SimdDBool b)
419 vec_and(a.simdInternal_, b.simdInternal_)
423 static inline SimdDBool gmx_simdcall
424 operator||(SimdDBool a, SimdDBool b)
427 vec_or(a.simdInternal_, b.simdInternal_)
431 static inline bool gmx_simdcall
434 vector4double b = vec_sldw(a.simdInternal_, a.simdInternal_, 2);
436 a.simdInternal_ = vec_or(a.simdInternal_, b);
437 b = vec_sldw(a.simdInternal_, a.simdInternal_, 1);
438 b = vec_or(a.simdInternal_, b);
439 return (vec_extract(b, 0) > 0);
442 static inline SimdDouble gmx_simdcall
443 selectByMask(SimdDouble a, SimdDBool m)
446 vec_sel(vec_splats(0.0), a.simdInternal_, m.simdInternal_)
450 static inline SimdDouble gmx_simdcall
451 selectByNotMask(SimdDouble a, SimdDBool m)
454 vec_sel(a.simdInternal_, vec_splats(0.0), m.simdInternal_)
458 static inline SimdDouble gmx_simdcall
459 blend(SimdDouble a, SimdDouble b, SimdDBool sel)
462 vec_sel(a.simdInternal_, b.simdInternal_, sel.simdInternal_)
466 static inline SimdDInt32 gmx_simdcall
470 vec_ctiw(a.simdInternal_)
474 static inline SimdDInt32 gmx_simdcall
475 cvttR2I(SimdDouble a)
478 vec_ctiwz(a.simdInternal_)
482 static inline SimdDouble gmx_simdcall
486 vec_cfid(a.simdInternal_)
490 static inline SimdDouble gmx_simdcall
498 static inline SimdFloat gmx_simdcall
506 static inline SimdDouble gmx_simdcall
507 copysign(SimdDouble x, SimdDouble y)
510 vec_cpsgn(y.simdInternal_, x.simdInternal_)
516 #endif // GMX_SIMD_IMPLEMENTATION_IBM_QPX_SIMD_DOUBLE_H