src/gromacs/simd/impl_arm_sve/impl_arm_sve_simd_double.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2020 Research Organization for Information Science and Technology (RIST).
   5  * Copyright (c) 2020, by the GROMACS development team, led by
   6  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   7  * and including many others, as listed in the AUTHORS file in the
   8  * top-level source directory and at http://www.gromacs.org.
   9  *
  10  * GROMACS is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public License
  12  * as published by the Free Software Foundation; either version 2.1
  13  * of the License, or (at your option) any later version.
  14  *
  15  * GROMACS is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with GROMACS; if not, see
  22  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  23  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  24  *
  25  * If you want to redistribute modifications to GROMACS, please
  26  * consider that scientific software is very special. Version
  27  * control is crucial - bugs must be traceable. We will be happy to
  28  * consider code for inclusion in the official distribution, but
  29  * derived work must not be called official GROMACS. Details are found
  30  * in the README & COPYING files - if they are missing, get the
  31  * official version at http://www.gromacs.org.
  32  *
  33  * To help us fund GROMACS development, we humbly ask that you cite
  34  * the research papers on the package. Check out http://www.gromacs.org.
  35  */
  36
  37 /*
  38  * armv8+sve support to GROMACS was contributed by the Research Organization for
  39  * Information Science and Technology (RIST).
  40  */
  41
  42 #ifndef GMX_SIMD_IMPL_ARM_SVE_SIMD_DOUBLE_H
  43 #define GMX_SIMD_IMPL_ARM_SVE_SIMD_DOUBLE_H
  44
  45 #include "config.h"
  46
  47 #include <cassert>
  48 #include <cstddef>
  49 #include <cstdint>
  50
  51 #include <arm_sve.h>
  52
  53 #include "gromacs/math/utilities.h"
  54
  55 #include "impl_arm_sve_simd_float.h"
  56
  57 #define SVE_DOUBLE_MASK svptrue_b64()
  58 #define SVE_DINT32_MASK svptrue_b64()
  59
  60 namespace gmx
  61 {
  62
  63 class SimdDouble
  64 {
  65 public:
  66     SimdDouble() {}
  67
  68     SimdDouble(const double d) { this->simdInternal_ = svdup_f64(d); }
  69
  70     SimdDouble(svfloat64_t simd) : simdInternal_(simd) {}
  71
  72     float64_t simdInternal_ __attribute__((vector_size(GMX_SIMD_ARM_SVE_LENGTH_VALUE / 8)));
  73 };
  74
  75 class SimdDInt32
  76 {
  77 public:
  78     SimdDInt32() {}
  79
  80     SimdDInt32(const int32_t i) { this->simdInternal_ = svdup_s64(i); }
  81
  82     SimdDInt32(svint64_t simd) : simdInternal_(simd) {}
  83
  84     int64_t simdInternal_ __attribute__((vector_size(GMX_SIMD_ARM_SVE_LENGTH_VALUE / 8)));
  85 };
  86
  87 class SimdDBool
  88 {
  89 public:
  90     SimdDBool() {}
  91
  92     SimdDBool(const bool b)
  93     {
  94         this->simdInternal_ = svdup_n_u64_x(svptrue_b64(), b ? 0xFFFFFFFFFFFFFFFF : 0);
  95     }
  96
  97     SimdDBool(svbool_t simd) { this->simdInternal_ = svdup_n_u64_z(simd, 0xFFFFFFFFFFFFFFFF); }
  98
  99     SimdDBool(svuint64_t simd) : simdInternal_(simd) {}
 100
 101     uint64_t simdInternal_ __attribute__((vector_size(GMX_SIMD_ARM_SVE_LENGTH_VALUE / 8)));
 102 };
 103
 104 class SimdDIBool
 105 {
 106 public:
 107     SimdDIBool() {}
 108
 109     SimdDIBool(const bool b)
 110     {
 111         this->simdInternal_ = svdup_n_u64_x(svptrue_b64(), b ? 0xFFFFFFFFFFFFFFFF : 0);
 112     }
 113
 114     SimdDIBool(svbool_t simd) { this->simdInternal_ = svdup_n_u64_z(simd, 0xFFFFFFFFFFFFFFFF); }
 115
 116     SimdDIBool(svuint64_t simd) : simdInternal_(simd) {}
 117
 118     uint64_t simdInternal_ __attribute__((vector_size(GMX_SIMD_ARM_SVE_LENGTH_VALUE / 8)));
 119 };
 120
 121 static inline SimdDouble gmx_simdcall simdLoad(const double* m, SimdDoubleTag = {})
 122 {
 123     assert(0 == (std::size_t(m) % GMX_SIMD_ALIGNMENT));
 124     svbool_t pg = SVE_DOUBLE_MASK;
 125     return { svld1_f64(pg, m) };
 126 }
 127
 128 static inline SimdDouble gmx_simdcall simdLoad(SimdDouble* m, int offset, SimdDoubleTag = {})
 129 {
 130     assert(0 == (std::size_t(m) % GMX_SIMD_ALIGNMENT));
 131     svbool_t pg = SVE_DOUBLE_MASK;
 132     return { svld1_f64(pg, reinterpret_cast<double*>(m) + offset * svcntd()) };
 133 }
 134
 135 static inline SimdDouble gmx_simdcall simdLoadDouble(const double* m)
 136 {
 137     assert(0 == (std::size_t(m) % GMX_SIMD_ALIGNMENT));
 138     svbool_t pg = SVE_DOUBLE_MASK;
 139     return { svld1_f64(pg, m) };
 140 }
 141
 142 static inline void gmx_simdcall store(double* m, SimdDouble a)
 143 {
 144     assert(0 == (std::size_t(m) % GMX_SIMD_ALIGNMENT));
 145     svbool_t pg = SVE_DOUBLE_MASK;
 146     svst1_f64(pg, m, a.simdInternal_);
 147 }
 148
 149 static inline SimdDouble gmx_simdcall simdLoadU(const double* m, SimdDoubleTag = {})
 150 {
 151     svbool_t pg = SVE_DOUBLE_MASK;
 152     return { svld1_f64(pg, m) };
 153 }
 154
 155 static inline void gmx_simdcall storeU(double* m, SimdDouble a)
 156 {
 157     svbool_t pg = SVE_DOUBLE_MASK;
 158     svst1_f64(pg, m, a.simdInternal_);
 159 }
 160
 161 static inline SimdDouble gmx_simdcall setZeroD()
 162 {
 163     return { svdup_f64(0.0) };
 164 }
 165
 166 static inline SimdDInt32 gmx_simdcall simdLoad(const std::int32_t* m, SimdDInt32Tag)
 167 {
 168     assert(0 == (std::size_t(m) % GMX_SIMD_ALIGNMENT));
 169     svbool_t pg = svwhilelt_b32(0, (int32_t)GMX_SIMD_DINT32_WIDTH);
 170     return { svunpklo_s64(svld1_s32(pg, m)) };
 171 }
 172
 173 static inline void gmx_simdcall store(std::int32_t* m, SimdDInt32 a)
 174 {
 175     assert(0 == (std::size_t(m) % GMX_SIMD_ALIGNMENT));
 176     svbool_t pg = svwhilelt_b32(0, (int32_t)GMX_SIMD_DINT32_WIDTH);
 177     svst1_s32(pg, m, svuzp1(svreinterpret_s32_s64(a.simdInternal_), svreinterpret_s32_s64(a.simdInternal_)));
 178 }
 179
 180 static inline SimdDInt32 gmx_simdcall simdLoadU(const std::int32_t* m, SimdDInt32Tag)
 181 {
 182     svbool_t pg = svwhilelt_b32(0, (int32_t)GMX_SIMD_DINT32_WIDTH);
 183     return { svunpklo_s64(svld1_s32(pg, m)) };
 184 }
 185
 186 static inline void gmx_simdcall storeU(std::int32_t* m, SimdDInt32 a)
 187 {
 188     svbool_t pg = svwhilelt_b32(0, (int32_t)GMX_SIMD_DINT32_WIDTH);
 189     svst1_s32(pg, m, svuzp1(svreinterpret_s32_s64(a.simdInternal_), svreinterpret_s32_s64(a.simdInternal_)));
 190 }
 191
 192 static inline SimdDInt32 gmx_simdcall setZeroDI()
 193 {
 194     return { svdup_s64(0) };
 195 }
 196
 197 template<int index>
 198 gmx_simdcall static inline std::int32_t extract(SimdDInt32 a)
 199 {
 200     svbool_t pg = svwhilelt_b64(0, index);
 201     return svlasta_s64(pg, a.simdInternal_);
 202 }
 203
 204 template<int index>
 205 gmx_simdcall static inline double extract(SimdDouble a)
 206 {
 207     svbool_t pg = svwhilelt_b64(0, index);
 208     return svlasta_f64(pg, a.simdInternal_);
 209 }
 210
 211 static inline SimdDouble gmx_simdcall operator&(SimdDouble a, SimdDouble b)
 212 {
 213     svbool_t pg = svptrue_b64();
 214     return { svreinterpret_f64_s64(svand_s64_x(
 215             pg, svreinterpret_s64_f64(a.simdInternal_), svreinterpret_s64_f64(b.simdInternal_))) };
 216 }
 217
 218 static inline SimdDouble gmx_simdcall andNot(SimdDouble a, SimdDouble b)
 219 {
 220     svbool_t pg = svptrue_b64();
 221     return { svreinterpret_f64_s64(svbic_s64_x(
 222             pg, svreinterpret_s64_f64(b.simdInternal_), svreinterpret_s64_f64(a.simdInternal_))) };
 223 }
 224
 225 static inline SimdDouble gmx_simdcall operator|(SimdDouble a, SimdDouble b)
 226 {
 227     svbool_t pg = svptrue_b64();
 228     return { svreinterpret_f64_s64(svorr_s64_x(
 229             pg, svreinterpret_s64_f64(a.simdInternal_), svreinterpret_s64_f64(b.simdInternal_))) };
 230 }
 231
 232 static inline SimdDouble gmx_simdcall operator^(SimdDouble a, SimdDouble b)
 233 {
 234     svbool_t pg = svptrue_b64();
 235     return { svreinterpret_f64_s64(sveor_s64_x(
 236             pg, svreinterpret_s64_f64(a.simdInternal_), svreinterpret_s64_f64(b.simdInternal_))) };
 237 }
 238
 239 static inline SimdDouble gmx_simdcall operator+(SimdDouble a, SimdDouble b)
 240 {
 241     svbool_t pg = svptrue_b64();
 242     return { svadd_f64_x(pg, a.simdInternal_, b.simdInternal_) };
 243 }
 244
 245 static inline SimdDouble gmx_simdcall operator-(SimdDouble a, SimdDouble b)
 246 {
 247     svbool_t pg = svptrue_b64();
 248     return { svsub_f64_x(pg, a.simdInternal_, b.simdInternal_) };
 249 }
 250
 251 static inline SimdDouble gmx_simdcall operator-(SimdDouble a)
 252 {
 253     svbool_t pg = svptrue_b64();
 254     return { svneg_f64_x(pg, a.simdInternal_) };
 255 }
 256
 257 static inline SimdDouble gmx_simdcall operator*(SimdDouble a, SimdDouble b)
 258 {
 259     svbool_t pg = svptrue_b64();
 260     return { svmul_f64_x(pg, a.simdInternal_, b.simdInternal_) };
 261 }
 262
 263 static inline SimdDouble gmx_simdcall fma(SimdDouble a, SimdDouble b, SimdDouble c)
 264 {
 265     svbool_t pg = svptrue_b64();
 266     return { svmad_f64_x(pg, a.simdInternal_, b.simdInternal_, c.simdInternal_) };
 267 }
 268
 269 static inline SimdDouble gmx_simdcall fms(SimdDouble a, SimdDouble b, SimdDouble c)
 270 {
 271     svbool_t pg = svptrue_b64();
 272     return { svnmsb_f64_x(pg, a.simdInternal_, b.simdInternal_, c.simdInternal_) };
 273 }
 274
 275 static inline SimdDouble gmx_simdcall fnma(SimdDouble a, SimdDouble b, SimdDouble c)
 276 {
 277     svbool_t pg = svptrue_b64();
 278     return { svmsb_f64_x(pg, a.simdInternal_, b.simdInternal_, c.simdInternal_) };
 279 }
 280
 281 static inline SimdDouble gmx_simdcall fnms(SimdDouble a, SimdDouble b, SimdDouble c)
 282 {
 283     svbool_t pg = svptrue_b64();
 284     return { svnmad_f64_x(pg, a.simdInternal_, b.simdInternal_, c.simdInternal_) };
 285 }
 286
 287 static inline SimdDouble gmx_simdcall rsqrt(SimdDouble x)
 288 {
 289     return { svrsqrte_f64(x.simdInternal_) };
 290 }
 291
 292 // The SIMD implementation seems to overflow when we square lu for
 293 // values close to FLOAT_MAX, so we fall back on the version in
 294 // simd_math.h, which is probably slightly slower.
 295 #if GMX_SIMD_HAVE_NATIVE_RSQRT_ITER_DOUBLE
 296 static inline SimdDouble gmx_simdcall rsqrtIter(SimdDouble lu, SimdDouble x)
 297 {
 298     return { vmulq_f64(lu.simdInternal_,
 299                        vrsqrtsq_f32(vmulq_f32(lu.simdInternal_, lu.simdInternal_), x.simdInternal_)) };
 300 }
 301 #endif
 302
 303 static inline SimdDouble gmx_simdcall rcp(SimdDouble x)
 304 {
 305     return { svrecpe_f64(x.simdInternal_) };
 306 }
 307
 308 static inline SimdDouble gmx_simdcall rcpIter(SimdDouble lu, SimdDouble x)
 309 {
 310     svbool_t pg = svptrue_b64();
 311     return { svmul_f64_x(pg, lu.simdInternal_, svrecps_f64(lu.simdInternal_, x.simdInternal_)) };
 312 }
 313
 314 static inline SimdDouble gmx_simdcall maskAdd(SimdDouble a, SimdDouble b, SimdDBool m)
 315 {
 316     svbool_t pg = svcmpne_n_u64(svptrue_b64(), m.simdInternal_, 0);
 317     return { svadd_f64_m(pg, a.simdInternal_, b.simdInternal_) };
 318 }
 319
 320 static inline SimdDouble gmx_simdcall maskzMul(SimdDouble a, SimdDouble b, SimdDBool m)
 321 {
 322     svbool_t pg = svcmpne_n_u64(svptrue_b64(), m.simdInternal_, 0);
 323     return { svmul_f64_z(pg, a.simdInternal_, b.simdInternal_) };
 324 }
 325
 326 static inline SimdDouble gmx_simdcall maskzFma(SimdDouble a, SimdDouble b, SimdDouble c, SimdDBool m)
 327 {
 328     svbool_t pg = svcmpne_n_u64(svptrue_b64(), m.simdInternal_, 0);
 329     return { svmad_f64_z(pg, a.simdInternal_, b.simdInternal_, c.simdInternal_) };
 330 }
 331
 332 static inline SimdDouble gmx_simdcall maskzRsqrt(SimdDouble x, SimdDBool m)
 333 {
 334     svbool_t pg = svcmpne_n_u64(svptrue_b64(), m.simdInternal_, 0);
 335     // The result will always be correct since we mask the result with m, but
 336     // for debug builds we also want to make sure not to generate FP exceptions
 337 #ifndef NDEBUG
 338     x.simdInternal_ = svsel_f64(pg, x.simdInternal_, svdup_n_f64(1.0));
 339 #endif
 340     return { svreinterpret_f64_u64(svand_n_u64_z(
 341             pg, svreinterpret_u64_f64(svrsqrte_f64(x.simdInternal_)), 0xFFFFFFFFFFFFFFFF)) };
 342 }
 343
 344 static inline SimdDouble gmx_simdcall maskzRcp(SimdDouble x, SimdDBool m)
 345 {
 346     svbool_t pg = svcmpne_n_u64(svptrue_b64(), m.simdInternal_, 0);
 347     // The result will always be correct since we mask the result with m, but
 348     // for debug builds we also want to make sure not to generate FP exceptions
 349 #ifndef NDEBUG
 350     x.simdInternal_ = svsel_f64(m, x.simdInternal_, svdup_n_f64(1.0));
 351 #endif
 352     return { svreinterpret_f64_u64(svand_n_u64_z(
 353             pg, svreinterpret_u64_f64(svrecpe_f64(x.simdInternal_)), 0xFFFFFFFFFFFFFFFF)) };
 354 }
 355
 356 static inline SimdDouble gmx_simdcall abs(SimdDouble x)
 357 {
 358     svbool_t pg = svptrue_b64();
 359     return { svabs_f64_x(pg, x.simdInternal_) };
 360 }
 361
 362 static inline SimdDouble gmx_simdcall max(SimdDouble a, SimdDouble b)
 363 {
 364     svbool_t pg = svptrue_b64();
 365     return { svmax_f64_x(pg, a.simdInternal_, b.simdInternal_) };
 366 }
 367
 368 static inline SimdDouble gmx_simdcall min(SimdDouble a, SimdDouble b)
 369 {
 370     svbool_t pg = svptrue_b64();
 371     return { svmin_f64_x(pg, a.simdInternal_, b.simdInternal_) };
 372 }
 373
 374 // Round and trunc operations are defined at the end of this file, since they
 375 // need to use double-to-integer and integer-to-double conversions.
 376
 377 template<MathOptimization opt = MathOptimization::Safe>
 378 static inline SimdDouble gmx_simdcall frexp(SimdDouble value, SimdDInt32* exponent)
 379 {
 380     svbool_t        pg           = svptrue_b64();
 381     const svint64_t exponentMask = svdup_n_s64(0x7FF0000000000000LL);
 382     const svint64_t mantissaMask = svdup_n_s64(0x800FFFFFFFFFFFFFLL);
 383     const svint64_t exponentBias = svdup_n_s64(1022LL); // add 1 to make our definition identical to frexp()
 384     const svfloat64_t half = svdup_n_f64(0.5);
 385     svint64_t         iExponent;
 386
 387     iExponent = svand_s64_x(pg, svreinterpret_s64_f64(value.simdInternal_), exponentMask);
 388     // iExponent               = svsub_s64_x(pg, svlsr_n_s64_x(pg, iExponent, 52), exponentBias);
 389     iExponent = svsub_s64_x(
 390             pg, svreinterpret_s64_u64(svlsr_n_u64_x(pg, svreinterpret_u64_s64(iExponent), 52)), exponentBias);
 391
 392     exponent->simdInternal_ = iExponent;
 393
 394     return { svreinterpret_f64_s64(
 395             svorr_s64_x(pg,
 396                         svand_s64_x(pg, svreinterpret_s64_f64(value.simdInternal_), mantissaMask),
 397                         svreinterpret_s64_f64(half))) };
 398 }
 399
 400 template<MathOptimization opt = MathOptimization::Safe>
 401 static inline SimdDouble gmx_simdcall ldexp(SimdDouble value, SimdDInt32 exponent)
 402 {
 403     svbool_t        pg           = svptrue_b64();
 404     const svint64_t exponentBias = svdup_n_s64(1023);
 405     svint64_t       iExponent    = svadd_s64_x(pg, exponent.simdInternal_, exponentBias);
 406
 407     if (opt == MathOptimization::Safe)
 408     {
 409         // Make sure biased argument is not negative
 410         iExponent = svmax_n_s64_x(pg, iExponent, 0);
 411     }
 412
 413     iExponent = svlsl_n_s64_x(pg, iExponent, 52);
 414
 415     return { svmul_f64_x(pg, value.simdInternal_, svreinterpret_f64_s64(iExponent)) };
 416 }
 417
 418 static inline double gmx_simdcall reduce(SimdDouble a)
 419 {
 420     svbool_t pg = svptrue_b64();
 421     return svadda_f64(pg, 0.0f, a.simdInternal_);
 422 }
 423
 424 static inline SimdDBool gmx_simdcall operator==(SimdDouble a, SimdDouble b)
 425 {
 426     svbool_t pg = svptrue_b64();
 427     return { svcmpeq_f64(pg, a.simdInternal_, b.simdInternal_) };
 428 }
 429
 430 static inline SimdDBool gmx_simdcall operator!=(SimdDouble a, SimdDouble b)
 431 {
 432     svbool_t pg = svptrue_b64();
 433     return { svcmpne_f64(pg, a.simdInternal_, b.simdInternal_) };
 434 }
 435
 436 static inline SimdDBool gmx_simdcall operator<(SimdDouble a, SimdDouble b)
 437 {
 438     svbool_t pg = svptrue_b64();
 439     return { svcmplt_f64(pg, a.simdInternal_, b.simdInternal_) };
 440 }
 441
 442 static inline SimdDBool gmx_simdcall operator<=(SimdDouble a, SimdDouble b)
 443 {
 444     svbool_t pg = svptrue_b64();
 445     return { svcmple_f64(pg, a.simdInternal_, b.simdInternal_) };
 446 }
 447
 448 static inline SimdDBool gmx_simdcall testBits(SimdDouble a)
 449 {
 450     svbool_t pg = svptrue_b64();
 451     return { svcmpne_n_s64(pg, svreinterpret_s64_f64(a.simdInternal_), 0) };
 452 }
 453
 454 static inline SimdDBool gmx_simdcall operator&&(SimdDBool a, SimdDBool b)
 455 {
 456     svbool_t pg = svptrue_b64();
 457     return { svand_u64_x(pg, a.simdInternal_, b.simdInternal_) };
 458 }
 459
 460 static inline SimdDBool gmx_simdcall operator||(SimdDBool a, SimdDBool b)
 461 {
 462     svbool_t pg = svptrue_b64();
 463     return { svorr_u64_x(pg, a.simdInternal_, b.simdInternal_) };
 464 }
 465
 466 static inline bool gmx_simdcall anyTrue(SimdDBool a)
 467 {
 468     svbool_t pg = svptrue_b64();
 469     return svptest_any(pg, svcmpne_n_u64(pg, a.simdInternal_, 0));
 470 }
 471
 472 static inline bool gmx_simdcall extractFirst(SimdDBool a)
 473 {
 474     svbool_t pg = svptrue_b64();
 475     return svptest_first(pg, svcmpne_n_u64(pg, a.simdInternal_, 0));
 476 }
 477
 478 static inline SimdDouble gmx_simdcall selectByMask(SimdDouble a, SimdDBool m)
 479 {
 480     svbool_t pg = svptrue_b64();
 481     return { svreinterpret_f64_u64(svand_u64_x(pg, svreinterpret_u64_f64(a.simdInternal_), m.simdInternal_)) };
 482 }
 483
 484 static inline SimdDouble gmx_simdcall selectByNotMask(SimdDouble a, SimdDBool m)
 485 {
 486     svbool_t pg = svcmpeq_n_u64(svptrue_b64(), m.simdInternal_, 0);
 487     return { svsel_f64(pg, a.simdInternal_, svdup_f64(0.0f)) };
 488 }
 489
 490 static inline SimdDouble gmx_simdcall blend(SimdDouble a, SimdDouble b, SimdDBool sel)
 491 {
 492     svbool_t pg = svcmpne_n_u64(svptrue_b64(), sel.simdInternal_, 0);
 493     return { svsel_f64(pg, b.simdInternal_, a.simdInternal_) };
 494 }
 495
 496 static inline SimdDInt32 gmx_simdcall operator&(SimdDInt32 a, SimdDInt32 b)
 497 {
 498     svbool_t pg = svptrue_b64();
 499     return { svand_s64_x(pg, a.simdInternal_, b.simdInternal_) };
 500 }
 501
 502 static inline SimdDInt32 gmx_simdcall andNot(SimdDInt32 a, SimdDInt32 b)
 503 {
 504     svbool_t pg = svptrue_b64();
 505     return { svbic_s64_x(pg, b.simdInternal_, a.simdInternal_) };
 506 }
 507
 508 static inline SimdDInt32 gmx_simdcall operator|(SimdDInt32 a, SimdDInt32 b)
 509 {
 510     svbool_t pg = svptrue_b64();
 511     return { svorr_s64_x(pg, a.simdInternal_, b.simdInternal_) };
 512 }
 513
 514 static inline SimdDInt32 gmx_simdcall operator^(SimdDInt32 a, SimdDInt32 b)
 515 {
 516     svbool_t pg = svptrue_b64();
 517     return { sveor_s64_x(pg, a.simdInternal_, b.simdInternal_) };
 518 }
 519
 520 static inline SimdDInt32 gmx_simdcall operator+(SimdDInt32 a, SimdDInt32 b)
 521 {
 522     svbool_t pg = svptrue_b64();
 523     return { svadd_s64_x(pg, a.simdInternal_, b.simdInternal_) };
 524 }
 525
 526 static inline SimdDInt32 gmx_simdcall operator-(SimdDInt32 a, SimdDInt32 b)
 527 {
 528     svbool_t pg = svptrue_b64();
 529     return { svsub_s64_x(pg, a.simdInternal_, b.simdInternal_) };
 530 }
 531
 532 static inline SimdDInt32 gmx_simdcall operator*(SimdDInt32 a, SimdDInt32 b)
 533 {
 534     svbool_t pg = svptrue_b64();
 535     return { svmul_s64_x(pg, a.simdInternal_, b.simdInternal_) };
 536 }
 537
 538 static inline SimdDIBool gmx_simdcall operator==(SimdDInt32 a, SimdDInt32 b)
 539 {
 540     svbool_t pg = svptrue_b64();
 541     return { svcmpeq_s64(pg, a.simdInternal_, b.simdInternal_) };
 542 }
 543
 544 static inline SimdDIBool gmx_simdcall testBits(SimdDInt32 a)
 545 {
 546     svbool_t pg = svptrue_b64();
 547     return { svcmpne_n_s64(pg, a.simdInternal_, (int64_t)0) };
 548 }
 549
 550 static inline SimdDIBool gmx_simdcall operator<(SimdDInt32 a, SimdDInt32 b)
 551 {
 552     svbool_t pg = svptrue_b64();
 553     return { svcmplt_s64(pg, a.simdInternal_, b.simdInternal_) };
 554 }
 555
 556 static inline SimdDIBool gmx_simdcall operator&&(SimdDIBool a, SimdDIBool b)
 557 {
 558     svbool_t pg = svptrue_b64();
 559     return { svand_u64_x(pg, a.simdInternal_, b.simdInternal_) };
 560 }
 561
 562 static inline SimdDIBool gmx_simdcall operator||(SimdDIBool a, SimdDIBool b)
 563 {
 564     svbool_t pg = svptrue_b64();
 565     return { svorr_u64_x(pg, a.simdInternal_, b.simdInternal_) };
 566 }
 567
 568 static inline bool gmx_simdcall anyTrue(SimdDIBool a)
 569 {
 570     svbool_t pg = svptrue_b64();
 571     return svptest_any(pg, svcmpne_n_u64(pg, a.simdInternal_, 0));
 572 }
 573
 574 static inline SimdDInt32 gmx_simdcall selectByMask(SimdDInt32 a, SimdDIBool m)
 575 {
 576     svbool_t pg = svptrue_b64();
 577     return { svand_s64_x(pg, a.simdInternal_, svreinterpret_s64_u64(m.simdInternal_)) };
 578 }
 579
 580 static inline SimdDInt32 gmx_simdcall selectByNotMask(SimdDInt32 a, SimdDIBool m)
 581 {
 582     svbool_t pg = svcmpeq_n_u64(svptrue_b64(), m.simdInternal_, 0);
 583     return { svadd_n_s64_z(pg, a.simdInternal_, 0) };
 584 }
 585
 586 static inline SimdDInt32 gmx_simdcall blend(SimdDInt32 a, SimdDInt32 b, SimdDIBool sel)
 587 {
 588     svbool_t pg = svcmpne_n_u64(svptrue_b64(), sel.simdInternal_, 0);
 589     return { svsel_s64(pg, b.simdInternal_, a.simdInternal_) };
 590 }
 591
 592 static inline SimdDInt32 gmx_simdcall cvtR2I(SimdDouble a)
 593 {
 594     svbool_t pg = svptrue_b64();
 595     return { svcvt_s64_x(pg, svrinta_f64_x(pg, a.simdInternal_)) };
 596 }
 597
 598 static inline SimdDInt32 gmx_simdcall cvttR2I(SimdDouble a)
 599 {
 600     // FIXME ???
 601     svbool_t pg = svptrue_b64();
 602     return { svcvt_s64_x(pg, a.simdInternal_) };
 603 }
 604
 605 static inline SimdDouble gmx_simdcall cvtI2R(SimdDInt32 a)
 606 {
 607     svbool_t pg = svptrue_b64();
 608     return { svcvt_f64_x(pg, a.simdInternal_) };
 609 }
 610
 611 static inline SimdDIBool gmx_simdcall cvtB2IB(SimdDBool a)
 612 {
 613     return { a.simdInternal_ };
 614 }
 615
 616 static inline SimdDBool gmx_simdcall cvtIB2B(SimdDIBool a)
 617 {
 618     return { a.simdInternal_ };
 619 }
 620
 621 static inline SimdDouble gmx_simdcall round(SimdDouble x)
 622 {
 623     svbool_t pg = svptrue_b64();
 624     return { svrinta_f64_x(pg, x.simdInternal_) };
 625 }
 626
 627 static inline SimdDouble gmx_simdcall trunc(SimdDouble x)
 628 {
 629     return cvtI2R(cvttR2I(x));
 630 }
 631
 632 static inline void gmx_simdcall cvtF2DD(SimdFloat gmx_unused f,
 633                                         SimdDouble gmx_unused* d0,
 634                                         SimdDouble gmx_unused* d1)
 635 {
 636     assert(GMX_SIMD_FLOAT_WIDTH == 2 * GMX_SIMD_DOUBLE_WIDTH);
 637     svbool_t pg       = svptrue_b32();
 638     d0->simdInternal_ = svcvt_f64_f32_x(pg, svzip1(f.simdInternal_, f.simdInternal_));
 639     d1->simdInternal_ = svcvt_f64_f32_x(pg, svzip2(f.simdInternal_, f.simdInternal_));
 640 }
 641
 642 static inline SimdFloat gmx_simdcall cvtDD2F(SimdDouble gmx_unused d0, SimdDouble gmx_unused d1)
 643 {
 644     svbool_t pg = svptrue_b64();
 645     assert(GMX_SIMD_FLOAT_WIDTH == 2 * GMX_SIMD_DOUBLE_WIDTH);
 646     return { svuzp1_f32(svcvt_f32_f64_x(pg, d0.simdInternal_), svcvt_f32_f64_x(pg, d1.simdInternal_)) };
 647 }
 648
 649 } // namespace gmx
 650
 651 #endif // GMX_SIMD_IMPL_ARM_SVE_SIMD_DOUBLE_H