src/gromacs/simd/impl_sparc64_hpc_ace/impl_sparc64_hpc_ace_simd_double.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2014,2015,2019, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 #ifndef GMX_SIMD_IMPL_SPARC64_HPC_ACE_SIMD_DOUBLE_H
  37 #define GMX_SIMD_IMPL_SPARC64_HPC_ACE_SIMD_DOUBLE_H
  38
  39 /* Fujitsu header borrows the name from SSE2, since some instructions have aliases.
  40  * Environment/compiler version GM-1.2.0-17 seems to be buggy; when -Xg is
  41  * defined to enable GNUC extensions, this sets _ISOC99_SOURCE, which in
  42  * turn causes all intrinsics to be declared inline _instead_ of static. This
  43  * leads to duplicate symbol errors at link time.
  44  * To work around this we unset this before including the HPC-ACE header, and
  45  * reset the value afterwards.
  46  */
  47 #ifdef _ISOC99_SOURCE
  48 #    undef _ISOC99_SOURCE
  49 #    define SAVE_ISOC99_SOURCE
  50 #endif
  51
  52 #include <emmintrin.h>
  53
  54 #ifdef SAVE_ISOC99_SOURCE
  55 #    define _ISOC99_SOURCE
  56 #    undef SAVE_ISOC99_SOURCE
  57 #endif
  58
  59 #include <cmath>
  60 #include <cstdint>
  61
  62 #include "impl_sparc64_hpc_ace_common.h"
  63
  64 /****************************************************
  65  *      DOUBLE PRECISION SIMD IMPLEMENTATION        *
  66  ****************************************************/
  67 #define SimdDouble _fjsp_v2r8
  68 #define simdLoadD _fjsp_load_v2r8
  69 #define simdLoad1D(m) _fjsp_set_v2r8((*m), (*m))
  70 #define simdSet1D(a) _fjsp_set_v2r8(a, a)
  71 #define simdStoreD _fjsp_store_v2r8
  72 #define simdLoadUD simdLoadD
  73 /* No unaligned store of SimdDouble */
  74 #define simdSetZeroD _fjsp_setzero_v2r8
  75 #define simdAddD _fjsp_add_v2r8
  76 #define simdSubD _fjsp_sub_v2r8
  77 #define simdMulD _fjsp_mul_v2r8
  78 #define simdFmaddD(a, b, c) _fjsp_madd_v2r8(a, b, c)
  79 #define simdFmsubD(a, b, c) _fjsp_msub_v2r8(a, b, c)
  80 #define simdFnmaddD(a, b, c) _fjsp_nmsub_v2r8(a, b, c)
  81 #define simdFnmsubD(a, b, c) _fjsp_nmadd_v2r8(a, b, c)
  82 #define simdAndD _fjsp_and_v2r8
  83 #define simdAndNotD _fjsp_andnot1_v2r8
  84 #define simdOrD _fjsp_or_v2r8
  85 #define simdXorD _fjsp_xor_v2r8
  86 #define simdRsqrtD(x) _fjsp_rsqrta_v2r8(x)
  87 #define simdRcpD(x) _fjsp_rcpa_v2r8(x)
  88 #define simdAbsD(x) _fjsp_abs_v2r8(x)
  89 #define simdNegD(x) _fjsp_neg_v2r8(x)
  90 #define simdMaxD _fjsp_max_v2r8
  91 #define simdMinD _fjsp_min_v2r8
  92 #define simdRoundD(x) simdCvtI2D(simdCvtD2I(x))
  93 #define simdTruncD(x) simdCvtI2D(simdCvttD2I(x))
  94 #define simdFractionD(x) simdSubD(x, simdTruncD(x))
  95 #define simdGetExponentD simdGetExponentD_sparc64_hpc_ace
  96 #define simdGetMantissaD simdGetMantissaD_sparc64_hpc_ace
  97 #define simdSetExponentD simdSetExponentD_sparc64_hpc_ace
  98 /* integer datatype corresponding to double: SimdDInt32 */
  99 #define SimdDInt32 _fjsp_v2r8
 100 #define simdLoadDI(m) simdLoadDI_sparc64_hpc_ace(m)
 101 #define simdSet1DI(i) simdSet1DI_sparc64_hpc_ace(i)
 102 #define simdStoreDI(m, x) simdStoreDI_sparc64_hpc_ace(m, x)
 103 #define simdLoadUDI simdLoadDI
 104 /* No unaligned store of SimdDInt32 */
 105 #define simdSetZeroDI _fjsp_setzero_v2r8
 106 #define simdCvtD2I simdCvtD2I_sparc64_hpc_ace
 107 #define simdCvttD2I _fjsp_dtox_v2r8
 108 #define simdCvtI2D _fjsp_xtod_v2r8
 109 #define simdExtractDI simdExtractDI_sparc64_hpc_ace
 110 /* Integer logical ops on SimdDInt32 */
 111 #define simdSlliDI simdSlliDI_sparc64_hpc_ace
 112 #define simdSrliDI simdSrliDI_sparc64_hpc_ace
 113 #define simdAndDI _fjsp_and_v2r8
 114 #define simdAndNotDI _fjsp_andnot1_v2r8
 115 #define simdOrDI _fjsp_or_v2r8
 116 #define simdXorDI _fjsp_xor_v2r8
 117 /* Integer arithmetic ops on integer datatype corresponding to double */
 118 /* Boolean & comparison operations on SimdDouble */
 119 #define SimdDBool _fjsp_v2r8
 120 #define simdCmpEqD _fjsp_cmpeq_v2r8
 121 #define simdCmpLtD _fjsp_cmplt_v2r8
 122 #define simdCmpLeD _fjsp_cmple_v2r8
 123 #define simdAndDB _fjsp_and_v2r8
 124 #define simdOrDB _fjsp_or_v2r8
 125 #define simdAnyTrueDB gmx_simd_anytrue_d_sparc64_hpc_ace
 126 #define simdMaskD _fjsp_and_v2r8
 127 #define simdMaskNotD(a, sel) _fjsp_andnot1_v2r8(sel, a)
 128 #define simdBlendD(a, b, sel) _fjsp_selmov_v2r8(b, a, sel)
 129 #define simdReduceD(a) simdReduceD_sparc64_hpc_ace(a)
 130
 131 /* No boolean & comparison operations on SimdDInt32 */
 132 /* Float/double conversion */
 133 #define simdCvtF2D(f) (f)
 134 #define simdCvtD2F(d) (d)
 135
 136
 137 /****************************************************
 138  * DOUBLE PRECISION IMPLEMENTATION HELPER FUNCTIONS *
 139  ****************************************************/
 140 static inline SimdDInt32 simdLoadDI_sparc64_hpc_ace(const int* m)
 141 {
 142     union {
 143         _fjsp_v2r8    simd;
 144         long long int i[2];
 145     } conv;
 146
 147     conv.i[0] = m[0];
 148     conv.i[1] = m[1];
 149
 150     return _fjsp_load_v2r8((double*)&(conv.simd));
 151 }
 152
 153 static inline void simdStoreDI_sparc64_hpc_ace(int* m, SimdDInt32 x)
 154 {
 155     union {
 156         _fjsp_v2r8    simd;
 157         long long int i[2];
 158     } conv;
 159
 160     _fjsp_store_v2r8((double*)&(conv.simd), x);
 161
 162     m[0] = conv.i[0];
 163     m[1] = conv.i[1];
 164 }
 165
 166 static inline SimdDInt32 simdSet1DI_sparc64_hpc_ace(int i)
 167 {
 168     union {
 169         _fjsp_v2r8    simd;
 170         long long int i[2];
 171     } conv;
 172
 173     conv.i[0] = i;
 174     conv.i[1] = i;
 175
 176     return _fjsp_load_v2r8((double*)&(conv.simd));
 177 }
 178
 179 static inline int simdExtractDI_sparc64_hpc_ace(SimdDInt32 x, int i)
 180 {
 181     long long int res;
 182     /* This conditional should be optimized away at compile time */
 183     if (i == 0)
 184     {
 185         _fjsp_storel_v2r8((double*)&res, x);
 186     }
 187     else
 188     {
 189         _fjsp_storeh_v2r8((double*)&res, x);
 190     }
 191     return (int)res;
 192 }
 193
 194 static inline SimdDInt32 simdSlliDI_sparc64_hpc_ace(SimdDInt32 x, int i)
 195 {
 196     _fjsp_v2i8 ix = *((_fjsp_v2i8*)&x);
 197     ix            = _fjsp_slli_v2i8(ix, i);
 198     x             = *((_fjsp_v2r8*)&ix);
 199     return x;
 200 }
 201
 202 static inline SimdDInt32 simdSrliDI_sparc64_hpc_ace(SimdDInt32 x, int i)
 203 {
 204     _fjsp_v2i8 ix = *((_fjsp_v2i8*)&x);
 205     ix            = _fjsp_srli_v2i8(ix, i);
 206     x             = *((_fjsp_v2r8*)&ix);
 207     return x;
 208 }
 209
 210 static inline SimdDInt32 simdCvtD2I_sparc64_hpc_ace(SimdDouble x)
 211 {
 212     _fjsp_v2r8 signbit = _fjsp_set_v2r8(-0.0, -0.0);
 213     _fjsp_v2r8 half    = _fjsp_set_v2r8(0.5, 0.5);
 214
 215     x = _fjsp_add_v2r8(x, _fjsp_or_v2r8(_fjsp_and_v2r8(signbit, x), half));
 216     return _fjsp_dtox_v2r8(x);
 217 }
 218
 219 static inline int gmx_simd_anytrue_d_sparc64_hpc_ace(SimdDBool x)
 220 {
 221     long long int i;
 222     x = _fjsp_or_v2r8(x, _fjsp_unpackhi_v2r8(x, x));
 223     _fjsp_storel_v2r8((double*)&i, x);
 224     return (i != 0LL);
 225 }
 226
 227 static inline double simdReduceD_sparc64_hpc_ace(SimdDouble x)
 228 {
 229     double d;
 230     x = _fjsp_add_v2r8(x, _fjsp_unpackhi_v2r8(x, x));
 231     _fjsp_storel_v2r8(&d, x);
 232     return d;
 233 }
 234
 235
 236 static inline SimdDouble simdGetExponentD_sparc64_hpc_ace(SimdDouble x)
 237 {
 238     /* HPC-ACE cannot cast _fjsp_v2r8 to _fjsp_v4i4, so to perform shifts we
 239      * would need to store and reload. Since we are only operating on two
 240      * numbers it is likely more efficient to do the operations directly on
 241      * normal registers.
 242      */
 243     const std::int64_t expmask = 0x7ff0000000000000LL;
 244     const std::int64_t expbias = 1023LL;
 245
 246     union {
 247         _fjsp_v2r8    simd;
 248         long long int i[2];
 249     } conv;
 250
 251     _fjsp_store_v2r8((double*)&conv.simd, x);
 252     conv.i[0] = ((conv.i[0] & expmask) >> 52) - expbias;
 253     conv.i[1] = ((conv.i[1] & expmask) >> 52) - expbias;
 254     x         = _fjsp_load_v2r8((double*)&conv.simd);
 255     return _fjsp_xtod_v2r8(x);
 256 }
 257
 258 static inline SimdDouble simdGetMantissaD_sparc64_hpc_ace(SimdDouble x)
 259 {
 260     std::int64_t mantmask[2] = { 0x000fffffffffffffLL, 0x000fffffffffffffLL };
 261     SimdDouble   one         = _fjsp_set_v2r8(1.0, 1.0);
 262
 263     x = _fjsp_and_v2r8(x, _fjsp_load_v2r8((double*)mantmask));
 264     return _fjsp_or_v2r8(x, one);
 265 }
 266
 267 static inline SimdDouble simdSetExponentD_sparc64_hpc_ace(SimdDouble x)
 268 {
 269     const std::int64_t expbias = 1023;
 270     union {
 271         _fjsp_v2r8    simd;
 272         long long int i[2];
 273     } conv;
 274
 275
 276     _fjsp_store_v2r8((double*)&conv.simd, simdCvtD2I_sparc64_hpc_ace(x));
 277     conv.i[0] = (conv.i[0] + expbias) << 52;
 278     conv.i[1] = (conv.i[1] + expbias) << 52;
 279
 280     return _fjsp_load_v2r8((double*)&conv.simd);
 281 }
 282
 283 #endif /* GMX_SIMD_IMPL_SPARC64_HPC_ACE_SIMD_DOUBLE_H */