2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2020 Research Organization for Information Science and Technology (RIST).
5 * Copyright (c) 2020, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
38 * armv8+sve support to GROMACS was contributed by the Research Organization for
39 * Information Science and Technology (RIST).
42 #ifndef GMX_SIMD_IMPL_ARM_SVE_SIMD_DOUBLE_H
43 #define GMX_SIMD_IMPL_ARM_SVE_SIMD_DOUBLE_H
53 #include "gromacs/math/utilities.h"
55 #include "impl_arm_sve_simd_float.h"
57 #define SVE_DOUBLE_MASK svptrue_b64()
58 #define SVE_DINT32_MASK svptrue_b64()
68 SimdDouble(const double d) { this->simdInternal_ = svdup_f64(d); }
70 SimdDouble(svfloat64_t simd) : simdInternal_(simd) {}
72 float64_t simdInternal_ __attribute__((vector_size(GMX_SIMD_ARM_SVE_LENGTH_VALUE / 8)));
80 SimdDInt32(const int32_t i) { this->simdInternal_ = svdup_s64(i); }
82 SimdDInt32(svint64_t simd) : simdInternal_(simd) {}
84 int64_t simdInternal_ __attribute__((vector_size(GMX_SIMD_ARM_SVE_LENGTH_VALUE / 8)));
92 SimdDBool(const bool b)
94 this->simdInternal_ = svdup_n_u64_x(svptrue_b64(), b ? 0xFFFFFFFFFFFFFFFF : 0);
97 SimdDBool(svbool_t simd) { this->simdInternal_ = svdup_n_u64_z(simd, 0xFFFFFFFFFFFFFFFF); }
99 SimdDBool(svuint64_t simd) : simdInternal_(simd) {}
101 uint64_t simdInternal_ __attribute__((vector_size(GMX_SIMD_ARM_SVE_LENGTH_VALUE / 8)));
109 SimdDIBool(const bool b)
111 this->simdInternal_ = svdup_n_u64_x(svptrue_b64(), b ? 0xFFFFFFFFFFFFFFFF : 0);
114 SimdDIBool(svbool_t simd) { this->simdInternal_ = svdup_n_u64_z(simd, 0xFFFFFFFFFFFFFFFF); }
116 SimdDIBool(svuint64_t simd) : simdInternal_(simd) {}
118 uint64_t simdInternal_ __attribute__((vector_size(GMX_SIMD_ARM_SVE_LENGTH_VALUE / 8)));
121 static inline SimdDouble gmx_simdcall simdLoad(const double* m, SimdDoubleTag = {})
123 assert(0 == (std::size_t(m) % GMX_SIMD_ALIGNMENT));
124 svbool_t pg = SVE_DOUBLE_MASK;
125 return { svld1_f64(pg, m) };
128 static inline SimdDouble gmx_simdcall simdLoad(SimdDouble* m, int offset, SimdDoubleTag = {})
130 assert(0 == (std::size_t(m) % GMX_SIMD_ALIGNMENT));
131 svbool_t pg = SVE_DOUBLE_MASK;
132 return { svld1_f64(pg, reinterpret_cast<double*>(m) + offset * svcntd()) };
135 static inline SimdDouble gmx_simdcall simdLoadDouble(const double* m)
137 assert(0 == (std::size_t(m) % GMX_SIMD_ALIGNMENT));
138 svbool_t pg = SVE_DOUBLE_MASK;
139 return { svld1_f64(pg, m) };
142 static inline void gmx_simdcall store(double* m, SimdDouble a)
144 assert(0 == (std::size_t(m) % GMX_SIMD_ALIGNMENT));
145 svbool_t pg = SVE_DOUBLE_MASK;
146 svst1_f64(pg, m, a.simdInternal_);
149 static inline SimdDouble gmx_simdcall simdLoadU(const double* m, SimdDoubleTag = {})
151 svbool_t pg = SVE_DOUBLE_MASK;
152 return { svld1_f64(pg, m) };
155 static inline void gmx_simdcall storeU(double* m, SimdDouble a)
157 svbool_t pg = SVE_DOUBLE_MASK;
158 svst1_f64(pg, m, a.simdInternal_);
161 static inline SimdDouble gmx_simdcall setZeroD()
163 return { svdup_f64(0.0) };
166 static inline SimdDInt32 gmx_simdcall simdLoad(const std::int32_t* m, SimdDInt32Tag)
168 assert(0 == (std::size_t(m) % GMX_SIMD_ALIGNMENT));
169 svbool_t pg = svwhilelt_b32(0, (int32_t)GMX_SIMD_DINT32_WIDTH);
170 return { svunpklo_s64(svld1_s32(pg, m)) };
173 static inline void gmx_simdcall store(std::int32_t* m, SimdDInt32 a)
175 assert(0 == (std::size_t(m) % GMX_SIMD_ALIGNMENT));
176 svbool_t pg = svwhilelt_b32(0, (int32_t)GMX_SIMD_DINT32_WIDTH);
177 svst1_s32(pg, m, svuzp1(svreinterpret_s32_s64(a.simdInternal_), svreinterpret_s32_s64(a.simdInternal_)));
180 static inline SimdDInt32 gmx_simdcall simdLoadU(const std::int32_t* m, SimdDInt32Tag)
182 svbool_t pg = svwhilelt_b32(0, (int32_t)GMX_SIMD_DINT32_WIDTH);
183 return { svunpklo_s64(svld1_s32(pg, m)) };
186 static inline void gmx_simdcall storeU(std::int32_t* m, SimdDInt32 a)
188 svbool_t pg = svwhilelt_b32(0, (int32_t)GMX_SIMD_DINT32_WIDTH);
189 svst1_s32(pg, m, svuzp1(svreinterpret_s32_s64(a.simdInternal_), svreinterpret_s32_s64(a.simdInternal_)));
192 static inline SimdDInt32 gmx_simdcall setZeroDI()
194 return { svdup_s64(0) };
198 gmx_simdcall static inline std::int32_t extract(SimdDInt32 a)
200 svbool_t pg = svwhilelt_b64(0, index);
201 return svlasta_s64(pg, a.simdInternal_);
205 gmx_simdcall static inline double extract(SimdDouble a)
207 svbool_t pg = svwhilelt_b64(0, index);
208 return svlasta_f64(pg, a.simdInternal_);
211 static inline SimdDouble gmx_simdcall operator&(SimdDouble a, SimdDouble b)
213 svbool_t pg = svptrue_b64();
214 return { svreinterpret_f64_s64(svand_s64_x(
215 pg, svreinterpret_s64_f64(a.simdInternal_), svreinterpret_s64_f64(b.simdInternal_))) };
218 static inline SimdDouble gmx_simdcall andNot(SimdDouble a, SimdDouble b)
220 svbool_t pg = svptrue_b64();
221 return { svreinterpret_f64_s64(svbic_s64_x(
222 pg, svreinterpret_s64_f64(b.simdInternal_), svreinterpret_s64_f64(a.simdInternal_))) };
225 static inline SimdDouble gmx_simdcall operator|(SimdDouble a, SimdDouble b)
227 svbool_t pg = svptrue_b64();
228 return { svreinterpret_f64_s64(svorr_s64_x(
229 pg, svreinterpret_s64_f64(a.simdInternal_), svreinterpret_s64_f64(b.simdInternal_))) };
232 static inline SimdDouble gmx_simdcall operator^(SimdDouble a, SimdDouble b)
234 svbool_t pg = svptrue_b64();
235 return { svreinterpret_f64_s64(sveor_s64_x(
236 pg, svreinterpret_s64_f64(a.simdInternal_), svreinterpret_s64_f64(b.simdInternal_))) };
239 static inline SimdDouble gmx_simdcall operator+(SimdDouble a, SimdDouble b)
241 svbool_t pg = svptrue_b64();
242 return { svadd_f64_x(pg, a.simdInternal_, b.simdInternal_) };
245 static inline SimdDouble gmx_simdcall operator-(SimdDouble a, SimdDouble b)
247 svbool_t pg = svptrue_b64();
248 return { svsub_f64_x(pg, a.simdInternal_, b.simdInternal_) };
251 static inline SimdDouble gmx_simdcall operator-(SimdDouble a)
253 svbool_t pg = svptrue_b64();
254 return { svneg_f64_x(pg, a.simdInternal_) };
257 static inline SimdDouble gmx_simdcall operator*(SimdDouble a, SimdDouble b)
259 svbool_t pg = svptrue_b64();
260 return { svmul_f64_x(pg, a.simdInternal_, b.simdInternal_) };
263 static inline SimdDouble gmx_simdcall fma(SimdDouble a, SimdDouble b, SimdDouble c)
265 svbool_t pg = svptrue_b64();
266 return { svmad_f64_x(pg, a.simdInternal_, b.simdInternal_, c.simdInternal_) };
269 static inline SimdDouble gmx_simdcall fms(SimdDouble a, SimdDouble b, SimdDouble c)
271 svbool_t pg = svptrue_b64();
272 return { svnmsb_f64_x(pg, a.simdInternal_, b.simdInternal_, c.simdInternal_) };
275 static inline SimdDouble gmx_simdcall fnma(SimdDouble a, SimdDouble b, SimdDouble c)
277 svbool_t pg = svptrue_b64();
278 return { svmsb_f64_x(pg, a.simdInternal_, b.simdInternal_, c.simdInternal_) };
281 static inline SimdDouble gmx_simdcall fnms(SimdDouble a, SimdDouble b, SimdDouble c)
283 svbool_t pg = svptrue_b64();
284 return { svnmad_f64_x(pg, a.simdInternal_, b.simdInternal_, c.simdInternal_) };
287 static inline SimdDouble gmx_simdcall rsqrt(SimdDouble x)
289 return { svrsqrte_f64(x.simdInternal_) };
292 // The SIMD implementation seems to overflow when we square lu for
293 // values close to FLOAT_MAX, so we fall back on the version in
294 // simd_math.h, which is probably slightly slower.
295 #if GMX_SIMD_HAVE_NATIVE_RSQRT_ITER_DOUBLE
296 static inline SimdDouble gmx_simdcall rsqrtIter(SimdDouble lu, SimdDouble x)
298 return { vmulq_f64(lu.simdInternal_,
299 vrsqrtsq_f32(vmulq_f32(lu.simdInternal_, lu.simdInternal_), x.simdInternal_)) };
303 static inline SimdDouble gmx_simdcall rcp(SimdDouble x)
305 return { svrecpe_f64(x.simdInternal_) };
308 static inline SimdDouble gmx_simdcall rcpIter(SimdDouble lu, SimdDouble x)
310 svbool_t pg = svptrue_b64();
311 return { svmul_f64_x(pg, lu.simdInternal_, svrecps_f64(lu.simdInternal_, x.simdInternal_)) };
314 static inline SimdDouble gmx_simdcall maskAdd(SimdDouble a, SimdDouble b, SimdDBool m)
316 svbool_t pg = svcmpne_n_u64(svptrue_b64(), m.simdInternal_, 0);
317 return { svadd_f64_m(pg, a.simdInternal_, b.simdInternal_) };
320 static inline SimdDouble gmx_simdcall maskzMul(SimdDouble a, SimdDouble b, SimdDBool m)
322 svbool_t pg = svcmpne_n_u64(svptrue_b64(), m.simdInternal_, 0);
323 return { svmul_f64_z(pg, a.simdInternal_, b.simdInternal_) };
326 static inline SimdDouble gmx_simdcall maskzFma(SimdDouble a, SimdDouble b, SimdDouble c, SimdDBool m)
328 svbool_t pg = svcmpne_n_u64(svptrue_b64(), m.simdInternal_, 0);
329 return { svmad_f64_z(pg, a.simdInternal_, b.simdInternal_, c.simdInternal_) };
332 static inline SimdDouble gmx_simdcall maskzRsqrt(SimdDouble x, SimdDBool m)
334 svbool_t pg = svcmpne_n_u64(svptrue_b64(), m.simdInternal_, 0);
335 // The result will always be correct since we mask the result with m, but
336 // for debug builds we also want to make sure not to generate FP exceptions
338 x.simdInternal_ = svsel_f64(pg, x.simdInternal_, svdup_n_f64(1.0));
340 return { svreinterpret_f64_u64(svand_n_u64_z(
341 pg, svreinterpret_u64_f64(svrsqrte_f64(x.simdInternal_)), 0xFFFFFFFFFFFFFFFF)) };
344 static inline SimdDouble gmx_simdcall maskzRcp(SimdDouble x, SimdDBool m)
346 svbool_t pg = svcmpne_n_u64(svptrue_b64(), m.simdInternal_, 0);
347 // The result will always be correct since we mask the result with m, but
348 // for debug builds we also want to make sure not to generate FP exceptions
350 x.simdInternal_ = svsel_f64(m, x.simdInternal_, svdup_n_f64(1.0));
352 return { svreinterpret_f64_u64(svand_n_u64_z(
353 pg, svreinterpret_u64_f64(svrecpe_f64(x.simdInternal_)), 0xFFFFFFFFFFFFFFFF)) };
356 static inline SimdDouble gmx_simdcall abs(SimdDouble x)
358 svbool_t pg = svptrue_b64();
359 return { svabs_f64_x(pg, x.simdInternal_) };
362 static inline SimdDouble gmx_simdcall max(SimdDouble a, SimdDouble b)
364 svbool_t pg = svptrue_b64();
365 return { svmax_f64_x(pg, a.simdInternal_, b.simdInternal_) };
368 static inline SimdDouble gmx_simdcall min(SimdDouble a, SimdDouble b)
370 svbool_t pg = svptrue_b64();
371 return { svmin_f64_x(pg, a.simdInternal_, b.simdInternal_) };
374 // Round and trunc operations are defined at the end of this file, since they
375 // need to use double-to-integer and integer-to-double conversions.
377 template<MathOptimization opt = MathOptimization::Safe>
378 static inline SimdDouble gmx_simdcall frexp(SimdDouble value, SimdDInt32* exponent)
380 svbool_t pg = svptrue_b64();
381 const svint64_t exponentMask = svdup_n_s64(0x7FF0000000000000LL);
382 const svint64_t mantissaMask = svdup_n_s64(0x800FFFFFFFFFFFFFLL);
383 const svint64_t exponentBias = svdup_n_s64(1022LL); // add 1 to make our definition identical to frexp()
384 const svfloat64_t half = svdup_n_f64(0.5);
387 iExponent = svand_s64_x(pg, svreinterpret_s64_f64(value.simdInternal_), exponentMask);
388 // iExponent = svsub_s64_x(pg, svlsr_n_s64_x(pg, iExponent, 52), exponentBias);
389 iExponent = svsub_s64_x(
390 pg, svreinterpret_s64_u64(svlsr_n_u64_x(pg, svreinterpret_u64_s64(iExponent), 52)), exponentBias);
392 exponent->simdInternal_ = iExponent;
394 return { svreinterpret_f64_s64(
396 svand_s64_x(pg, svreinterpret_s64_f64(value.simdInternal_), mantissaMask),
397 svreinterpret_s64_f64(half))) };
400 template<MathOptimization opt = MathOptimization::Safe>
401 static inline SimdDouble gmx_simdcall ldexp(SimdDouble value, SimdDInt32 exponent)
403 svbool_t pg = svptrue_b64();
404 const svint64_t exponentBias = svdup_n_s64(1023);
405 svint64_t iExponent = svadd_s64_x(pg, exponent.simdInternal_, exponentBias);
407 if (opt == MathOptimization::Safe)
409 // Make sure biased argument is not negative
410 iExponent = svmax_n_s64_x(pg, iExponent, 0);
413 iExponent = svlsl_n_s64_x(pg, iExponent, 52);
415 return { svmul_f64_x(pg, value.simdInternal_, svreinterpret_f64_s64(iExponent)) };
418 static inline double gmx_simdcall reduce(SimdDouble a)
420 svbool_t pg = svptrue_b64();
421 return svadda_f64(pg, 0.0f, a.simdInternal_);
424 static inline SimdDBool gmx_simdcall operator==(SimdDouble a, SimdDouble b)
426 svbool_t pg = svptrue_b64();
427 return { svcmpeq_f64(pg, a.simdInternal_, b.simdInternal_) };
430 static inline SimdDBool gmx_simdcall operator!=(SimdDouble a, SimdDouble b)
432 svbool_t pg = svptrue_b64();
433 return { svcmpne_f64(pg, a.simdInternal_, b.simdInternal_) };
436 static inline SimdDBool gmx_simdcall operator<(SimdDouble a, SimdDouble b)
438 svbool_t pg = svptrue_b64();
439 return { svcmplt_f64(pg, a.simdInternal_, b.simdInternal_) };
442 static inline SimdDBool gmx_simdcall operator<=(SimdDouble a, SimdDouble b)
444 svbool_t pg = svptrue_b64();
445 return { svcmple_f64(pg, a.simdInternal_, b.simdInternal_) };
448 static inline SimdDBool gmx_simdcall testBits(SimdDouble a)
450 svbool_t pg = svptrue_b64();
451 return { svcmpne_n_s64(pg, svreinterpret_s64_f64(a.simdInternal_), 0) };
454 static inline SimdDBool gmx_simdcall operator&&(SimdDBool a, SimdDBool b)
456 svbool_t pg = svptrue_b64();
457 return { svand_u64_x(pg, a.simdInternal_, b.simdInternal_) };
460 static inline SimdDBool gmx_simdcall operator||(SimdDBool a, SimdDBool b)
462 svbool_t pg = svptrue_b64();
463 return { svorr_u64_x(pg, a.simdInternal_, b.simdInternal_) };
466 static inline bool gmx_simdcall anyTrue(SimdDBool a)
468 svbool_t pg = svptrue_b64();
469 return svptest_any(pg, svcmpne_n_u64(pg, a.simdInternal_, 0));
472 static inline bool gmx_simdcall extractFirst(SimdDBool a)
474 svbool_t pg = svptrue_b64();
475 return svptest_first(pg, svcmpne_n_u64(pg, a.simdInternal_, 0));
478 static inline SimdDouble gmx_simdcall selectByMask(SimdDouble a, SimdDBool m)
480 svbool_t pg = svptrue_b64();
481 return { svreinterpret_f64_u64(svand_u64_x(pg, svreinterpret_u64_f64(a.simdInternal_), m.simdInternal_)) };
484 static inline SimdDouble gmx_simdcall selectByNotMask(SimdDouble a, SimdDBool m)
486 svbool_t pg = svcmpeq_n_u64(svptrue_b64(), m.simdInternal_, 0);
487 return { svsel_f64(pg, a.simdInternal_, svdup_f64(0.0f)) };
490 static inline SimdDouble gmx_simdcall blend(SimdDouble a, SimdDouble b, SimdDBool sel)
492 svbool_t pg = svcmpne_n_u64(svptrue_b64(), sel.simdInternal_, 0);
493 return { svsel_f64(pg, b.simdInternal_, a.simdInternal_) };
496 static inline SimdDInt32 gmx_simdcall operator&(SimdDInt32 a, SimdDInt32 b)
498 svbool_t pg = svptrue_b64();
499 return { svand_s64_x(pg, a.simdInternal_, b.simdInternal_) };
502 static inline SimdDInt32 gmx_simdcall andNot(SimdDInt32 a, SimdDInt32 b)
504 svbool_t pg = svptrue_b64();
505 return { svbic_s64_x(pg, b.simdInternal_, a.simdInternal_) };
508 static inline SimdDInt32 gmx_simdcall operator|(SimdDInt32 a, SimdDInt32 b)
510 svbool_t pg = svptrue_b64();
511 return { svorr_s64_x(pg, a.simdInternal_, b.simdInternal_) };
514 static inline SimdDInt32 gmx_simdcall operator^(SimdDInt32 a, SimdDInt32 b)
516 svbool_t pg = svptrue_b64();
517 return { sveor_s64_x(pg, a.simdInternal_, b.simdInternal_) };
520 static inline SimdDInt32 gmx_simdcall operator+(SimdDInt32 a, SimdDInt32 b)
522 svbool_t pg = svptrue_b64();
523 return { svadd_s64_x(pg, a.simdInternal_, b.simdInternal_) };
526 static inline SimdDInt32 gmx_simdcall operator-(SimdDInt32 a, SimdDInt32 b)
528 svbool_t pg = svptrue_b64();
529 return { svsub_s64_x(pg, a.simdInternal_, b.simdInternal_) };
532 static inline SimdDInt32 gmx_simdcall operator*(SimdDInt32 a, SimdDInt32 b)
534 svbool_t pg = svptrue_b64();
535 return { svmul_s64_x(pg, a.simdInternal_, b.simdInternal_) };
538 static inline SimdDIBool gmx_simdcall operator==(SimdDInt32 a, SimdDInt32 b)
540 svbool_t pg = svptrue_b64();
541 return { svcmpeq_s64(pg, a.simdInternal_, b.simdInternal_) };
544 static inline SimdDIBool gmx_simdcall testBits(SimdDInt32 a)
546 svbool_t pg = svptrue_b64();
547 return { svcmpne_n_s64(pg, a.simdInternal_, (int64_t)0) };
550 static inline SimdDIBool gmx_simdcall operator<(SimdDInt32 a, SimdDInt32 b)
552 svbool_t pg = svptrue_b64();
553 return { svcmplt_s64(pg, a.simdInternal_, b.simdInternal_) };
556 static inline SimdDIBool gmx_simdcall operator&&(SimdDIBool a, SimdDIBool b)
558 svbool_t pg = svptrue_b64();
559 return { svand_u64_x(pg, a.simdInternal_, b.simdInternal_) };
562 static inline SimdDIBool gmx_simdcall operator||(SimdDIBool a, SimdDIBool b)
564 svbool_t pg = svptrue_b64();
565 return { svorr_u64_x(pg, a.simdInternal_, b.simdInternal_) };
568 static inline bool gmx_simdcall anyTrue(SimdDIBool a)
570 svbool_t pg = svptrue_b64();
571 return svptest_any(pg, svcmpne_n_u64(pg, a.simdInternal_, 0));
574 static inline SimdDInt32 gmx_simdcall selectByMask(SimdDInt32 a, SimdDIBool m)
576 svbool_t pg = svptrue_b64();
577 return { svand_s64_x(pg, a.simdInternal_, svreinterpret_s64_u64(m.simdInternal_)) };
580 static inline SimdDInt32 gmx_simdcall selectByNotMask(SimdDInt32 a, SimdDIBool m)
582 svbool_t pg = svcmpeq_n_u64(svptrue_b64(), m.simdInternal_, 0);
583 return { svadd_n_s64_z(pg, a.simdInternal_, 0) };
586 static inline SimdDInt32 gmx_simdcall blend(SimdDInt32 a, SimdDInt32 b, SimdDIBool sel)
588 svbool_t pg = svcmpne_n_u64(svptrue_b64(), sel.simdInternal_, 0);
589 return { svsel_s64(pg, b.simdInternal_, a.simdInternal_) };
592 static inline SimdDInt32 gmx_simdcall cvtR2I(SimdDouble a)
594 svbool_t pg = svptrue_b64();
595 return { svcvt_s64_x(pg, svrinta_f64_x(pg, a.simdInternal_)) };
598 static inline SimdDInt32 gmx_simdcall cvttR2I(SimdDouble a)
601 svbool_t pg = svptrue_b64();
602 return { svcvt_s64_x(pg, a.simdInternal_) };
605 static inline SimdDouble gmx_simdcall cvtI2R(SimdDInt32 a)
607 svbool_t pg = svptrue_b64();
608 return { svcvt_f64_x(pg, a.simdInternal_) };
611 static inline SimdDIBool gmx_simdcall cvtB2IB(SimdDBool a)
613 return { a.simdInternal_ };
616 static inline SimdDBool gmx_simdcall cvtIB2B(SimdDIBool a)
618 return { a.simdInternal_ };
621 static inline SimdDouble gmx_simdcall round(SimdDouble x)
623 svbool_t pg = svptrue_b64();
624 return { svrinta_f64_x(pg, x.simdInternal_) };
627 static inline SimdDouble gmx_simdcall trunc(SimdDouble x)
629 return cvtI2R(cvttR2I(x));
632 static inline void gmx_simdcall cvtF2DD(SimdFloat gmx_unused f,
633 SimdDouble gmx_unused* d0,
634 SimdDouble gmx_unused* d1)
636 assert(GMX_SIMD_FLOAT_WIDTH == 2 * GMX_SIMD_DOUBLE_WIDTH);
637 svbool_t pg = svptrue_b32();
638 d0->simdInternal_ = svcvt_f64_f32_x(pg, svzip1(f.simdInternal_, f.simdInternal_));
639 d1->simdInternal_ = svcvt_f64_f32_x(pg, svzip2(f.simdInternal_, f.simdInternal_));
642 static inline SimdFloat gmx_simdcall cvtDD2F(SimdDouble gmx_unused d0, SimdDouble gmx_unused d1)
644 svbool_t pg = svptrue_b64();
645 assert(GMX_SIMD_FLOAT_WIDTH == 2 * GMX_SIMD_DOUBLE_WIDTH);
646 return { svuzp1_f32(svcvt_f32_f64_x(pg, d0.simdInternal_), svcvt_f32_f64_x(pg, d1.simdInternal_)) };
651 #endif // GMX_SIMD_IMPL_ARM_SVE_SIMD_DOUBLE_H