2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2014,2015,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 #ifndef GMX_SIMD_IMPL_SPARC64_HPC_ACE_SIMD_DOUBLE_H
37 #define GMX_SIMD_IMPL_SPARC64_HPC_ACE_SIMD_DOUBLE_H
39 /* Fujitsu header borrows the name from SSE2, since some instructions have aliases.
40 * Environment/compiler version GM-1.2.0-17 seems to be buggy; when -Xg is
41 * defined to enable GNUC extensions, this sets _ISOC99_SOURCE, which in
42 * turn causes all intrinsics to be declared inline _instead_ of static. This
43 * leads to duplicate symbol errors at link time.
44 * To work around this we unset this before including the HPC-ACE header, and
45 * reset the value afterwards.
48 # undef _ISOC99_SOURCE
49 # define SAVE_ISOC99_SOURCE
52 #include <emmintrin.h>
54 #ifdef SAVE_ISOC99_SOURCE
55 # define _ISOC99_SOURCE
56 # undef SAVE_ISOC99_SOURCE
62 #include "impl_sparc64_hpc_ace_common.h"
64 /****************************************************
65 * DOUBLE PRECISION SIMD IMPLEMENTATION *
66 ****************************************************/
67 #define SimdDouble _fjsp_v2r8
68 #define simdLoadD _fjsp_load_v2r8
69 #define simdLoad1D(m) _fjsp_set_v2r8((*m), (*m))
70 #define simdSet1D(a) _fjsp_set_v2r8(a, a)
71 #define simdStoreD _fjsp_store_v2r8
72 #define simdLoadUD simdLoadD
73 /* No unaligned store of SimdDouble */
74 #define simdSetZeroD _fjsp_setzero_v2r8
75 #define simdAddD _fjsp_add_v2r8
76 #define simdSubD _fjsp_sub_v2r8
77 #define simdMulD _fjsp_mul_v2r8
78 #define simdFmaddD(a, b, c) _fjsp_madd_v2r8(a, b, c)
79 #define simdFmsubD(a, b, c) _fjsp_msub_v2r8(a, b, c)
80 #define simdFnmaddD(a, b, c) _fjsp_nmsub_v2r8(a, b, c)
81 #define simdFnmsubD(a, b, c) _fjsp_nmadd_v2r8(a, b, c)
82 #define simdAndD _fjsp_and_v2r8
83 #define simdAndNotD _fjsp_andnot1_v2r8
84 #define simdOrD _fjsp_or_v2r8
85 #define simdXorD _fjsp_xor_v2r8
86 #define simdRsqrtD(x) _fjsp_rsqrta_v2r8(x)
87 #define simdRcpD(x) _fjsp_rcpa_v2r8(x)
88 #define simdAbsD(x) _fjsp_abs_v2r8(x)
89 #define simdNegD(x) _fjsp_neg_v2r8(x)
90 #define simdMaxD _fjsp_max_v2r8
91 #define simdMinD _fjsp_min_v2r8
92 #define simdRoundD(x) simdCvtI2D(simdCvtD2I(x))
93 #define simdTruncD(x) simdCvtI2D(simdCvttD2I(x))
94 #define simdFractionD(x) simdSubD(x, simdTruncD(x))
95 #define simdGetExponentD simdGetExponentD_sparc64_hpc_ace
96 #define simdGetMantissaD simdGetMantissaD_sparc64_hpc_ace
97 #define simdSetExponentD simdSetExponentD_sparc64_hpc_ace
98 /* integer datatype corresponding to double: SimdDInt32 */
99 #define SimdDInt32 _fjsp_v2r8
100 #define simdLoadDI(m) simdLoadDI_sparc64_hpc_ace(m)
101 #define simdSet1DI(i) simdSet1DI_sparc64_hpc_ace(i)
102 #define simdStoreDI(m, x) simdStoreDI_sparc64_hpc_ace(m, x)
103 #define simdLoadUDI simdLoadDI
104 /* No unaligned store of SimdDInt32 */
105 #define simdSetZeroDI _fjsp_setzero_v2r8
106 #define simdCvtD2I simdCvtD2I_sparc64_hpc_ace
107 #define simdCvttD2I _fjsp_dtox_v2r8
108 #define simdCvtI2D _fjsp_xtod_v2r8
109 #define simdExtractDI simdExtractDI_sparc64_hpc_ace
110 /* Integer logical ops on SimdDInt32 */
111 #define simdSlliDI simdSlliDI_sparc64_hpc_ace
112 #define simdSrliDI simdSrliDI_sparc64_hpc_ace
113 #define simdAndDI _fjsp_and_v2r8
114 #define simdAndNotDI _fjsp_andnot1_v2r8
115 #define simdOrDI _fjsp_or_v2r8
116 #define simdXorDI _fjsp_xor_v2r8
117 /* Integer arithmetic ops on integer datatype corresponding to double */
118 /* Boolean & comparison operations on SimdDouble */
119 #define SimdDBool _fjsp_v2r8
120 #define simdCmpEqD _fjsp_cmpeq_v2r8
121 #define simdCmpLtD _fjsp_cmplt_v2r8
122 #define simdCmpLeD _fjsp_cmple_v2r8
123 #define simdAndDB _fjsp_and_v2r8
124 #define simdOrDB _fjsp_or_v2r8
125 #define simdAnyTrueDB gmx_simd_anytrue_d_sparc64_hpc_ace
126 #define simdMaskD _fjsp_and_v2r8
127 #define simdMaskNotD(a, sel) _fjsp_andnot1_v2r8(sel, a)
128 #define simdBlendD(a, b, sel) _fjsp_selmov_v2r8(b, a, sel)
129 #define simdReduceD(a) simdReduceD_sparc64_hpc_ace(a)
131 /* No boolean & comparison operations on SimdDInt32 */
132 /* Float/double conversion */
133 #define simdCvtF2D(f) (f)
134 #define simdCvtD2F(d) (d)
137 /****************************************************
138 * DOUBLE PRECISION IMPLEMENTATION HELPER FUNCTIONS *
139 ****************************************************/
140 static inline SimdDInt32 simdLoadDI_sparc64_hpc_ace(const int* m)
150 return _fjsp_load_v2r8((double*)&(conv.simd));
153 static inline void simdStoreDI_sparc64_hpc_ace(int* m, SimdDInt32 x)
160 _fjsp_store_v2r8((double*)&(conv.simd), x);
166 static inline SimdDInt32 simdSet1DI_sparc64_hpc_ace(int i)
176 return _fjsp_load_v2r8((double*)&(conv.simd));
179 static inline int simdExtractDI_sparc64_hpc_ace(SimdDInt32 x, int i)
182 /* This conditional should be optimized away at compile time */
185 _fjsp_storel_v2r8((double*)&res, x);
189 _fjsp_storeh_v2r8((double*)&res, x);
194 static inline SimdDInt32 simdSlliDI_sparc64_hpc_ace(SimdDInt32 x, int i)
196 _fjsp_v2i8 ix = *((_fjsp_v2i8*)&x);
197 ix = _fjsp_slli_v2i8(ix, i);
198 x = *((_fjsp_v2r8*)&ix);
202 static inline SimdDInt32 simdSrliDI_sparc64_hpc_ace(SimdDInt32 x, int i)
204 _fjsp_v2i8 ix = *((_fjsp_v2i8*)&x);
205 ix = _fjsp_srli_v2i8(ix, i);
206 x = *((_fjsp_v2r8*)&ix);
210 static inline SimdDInt32 simdCvtD2I_sparc64_hpc_ace(SimdDouble x)
212 _fjsp_v2r8 signbit = _fjsp_set_v2r8(-0.0, -0.0);
213 _fjsp_v2r8 half = _fjsp_set_v2r8(0.5, 0.5);
215 x = _fjsp_add_v2r8(x, _fjsp_or_v2r8(_fjsp_and_v2r8(signbit, x), half));
216 return _fjsp_dtox_v2r8(x);
219 static inline int gmx_simd_anytrue_d_sparc64_hpc_ace(SimdDBool x)
222 x = _fjsp_or_v2r8(x, _fjsp_unpackhi_v2r8(x, x));
223 _fjsp_storel_v2r8((double*)&i, x);
227 static inline double simdReduceD_sparc64_hpc_ace(SimdDouble x)
230 x = _fjsp_add_v2r8(x, _fjsp_unpackhi_v2r8(x, x));
231 _fjsp_storel_v2r8(&d, x);
236 static inline SimdDouble simdGetExponentD_sparc64_hpc_ace(SimdDouble x)
238 /* HPC-ACE cannot cast _fjsp_v2r8 to _fjsp_v4i4, so to perform shifts we
239 * would need to store and reload. Since we are only operating on two
240 * numbers it is likely more efficient to do the operations directly on
243 const std::int64_t expmask = 0x7ff0000000000000LL;
244 const std::int64_t expbias = 1023LL;
251 _fjsp_store_v2r8((double*)&conv.simd, x);
252 conv.i[0] = ((conv.i[0] & expmask) >> 52) - expbias;
253 conv.i[1] = ((conv.i[1] & expmask) >> 52) - expbias;
254 x = _fjsp_load_v2r8((double*)&conv.simd);
255 return _fjsp_xtod_v2r8(x);
258 static inline SimdDouble simdGetMantissaD_sparc64_hpc_ace(SimdDouble x)
260 std::int64_t mantmask[2] = { 0x000fffffffffffffLL, 0x000fffffffffffffLL };
261 SimdDouble one = _fjsp_set_v2r8(1.0, 1.0);
263 x = _fjsp_and_v2r8(x, _fjsp_load_v2r8((double*)mantmask));
264 return _fjsp_or_v2r8(x, one);
267 static inline SimdDouble simdSetExponentD_sparc64_hpc_ace(SimdDouble x)
269 const std::int64_t expbias = 1023;
276 _fjsp_store_v2r8((double*)&conv.simd, simdCvtD2I_sparc64_hpc_ace(x));
277 conv.i[0] = (conv.i[0] + expbias) << 52;
278 conv.i[1] = (conv.i[1] + expbias) << 52;
280 return _fjsp_load_v2r8((double*)&conv.simd);
283 #endif /* GMX_SIMD_IMPL_SPARC64_HPC_ACE_SIMD_DOUBLE_H */