2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 #ifndef GMX_SIMD_IMPLEMENTATION_IBM_QPX_H
37 #define GMX_SIMD_IMPLEMENTATION_IBM_QPX_H
46 /* IBM QPX SIMD instruction wrappers
48 * Please see documentation in gromacs/simd/simd.h for the available
51 /* Capability definitions for IBM QPX */
52 #define GMX_SIMD_HAVE_FLOAT
53 #define GMX_SIMD_HAVE_DOUBLE
54 #define GMX_SIMD_HAVE_HARDWARE
55 #undef GMX_SIMD_HAVE_STOREU
56 #undef GMX_SIMD_HAVE_STOREU
57 #undef GMX_SIMD_HAVE_LOGICAL
58 #define GMX_SIMD_HAVE_FMA
59 #undef GMX_SIMD_HAVE_FRACTION
60 #define GMX_SIMD_HAVE_FINT32
61 #undef GMX_SIMD_HAVE_FINT32_EXTRACT
62 #undef GMX_SIMD_HAVE_FINT32_LOGICAL
63 #undef GMX_SIMD_HAVE_FINT32_ARITHMETICS
64 #define GMX_SIMD_HAVE_DINT32
65 #undef GMX_SIMD_HAVE_DINT32_EXTRACT
66 #undef GMX_SIMD_HAVE_DINT32_LOGICAL
67 #undef GMX_SIMD_HAVE_DINT32_ARITHMETICS
68 #define GMX_SIMD4_HAVE_FLOAT
69 #define GMX_SIMD4_HAVE_DOUBLE
71 /* Implementation details */
72 #define GMX_SIMD_FLOAT_WIDTH 4
73 #define GMX_SIMD_DOUBLE_WIDTH 4
74 #define GMX_SIMD_FINT32_WIDTH 4
75 #define GMX_SIMD_DINT32_WIDTH 4
76 #define GMX_SIMD_RSQRT_BITS 14
77 #define GMX_SIMD_RCP_BITS 14
79 /****************************************************
80 * SINGLE PRECISION SIMD IMPLEMENTATION *
81 ****************************************************/
82 #define gmx_simd_float_t vector4double
84 # define gmx_simd_load_f(m) vec_ld(0, (float *)(m))
85 # define gmx_simd_store_f(m, a) vec_st(a, 0, (float *)(m))
87 # define gmx_simd_load_f(m) vec_lda(0, (float *)(m))
88 # define gmx_simd_store_f(m, a) vec_sta(a, 0, (float *)(m))
90 # define gmx_simd_load1_f(m) vec_lds(0, (float *)(m))
91 #define gmx_simd_set1_f(x) vec_splats(x)
92 /* No support for unaligned load/store */
93 #define gmx_simd_setzero_f gmx_simd_setzero_ibm_qpx
94 #define gmx_simd_add_f(a, b) vec_add(a, b)
95 #define gmx_simd_sub_f(a, b) vec_sub(a, b)
96 #define gmx_simd_mul_f(a, b) vec_mul(a, b)
97 #define gmx_simd_fmadd_f(a, b, c) vec_madd(a, b, c)
98 #define gmx_simd_fmsub_f(a, b, c) vec_msub(a, b, c)
99 /* IBM uses an alternative FMA definition, so -a*b+c=-(a*b-c) is "nmsub" */
100 #define gmx_simd_fnmadd_f(a, b, c) vec_nmsub(a, b, c)
101 /* IBM uses an alternative FMA definition, so -a*b-c=-(a*b+c) is "nmadd" */
102 #define gmx_simd_fnmsub_f(a, b, c) vec_nmadd(a, b, c)
103 /* gmx_simd_and_f not supported - no bitwise logical ops */
104 /* gmx_simd_andnot_f not supported - no bitwise logical ops */
105 /* gmx_simd_or_f not supported - no bitwise logical ops */
106 /* gmx_simd_xor_f not supported - no bitwise logical ops */
107 #define gmx_simd_rsqrt_f(a) vec_rsqrte(a)
108 #define gmx_simd_rcp_f(a) vec_re(a)
109 #define gmx_simd_fabs_f(a) vec_abs(a)
110 #define gmx_simd_fneg_f gmx_simd_fneg_ibm_qpx
111 #define gmx_simd_max_f(a, b) vec_sel(b, a, vec_sub(a, b))
112 #define gmx_simd_min_f(a, b) vec_sel(b, a, vec_sub(b, a))
113 /* Note: It is critical to use vec_cfid(vec_ctid(a)) for the implementation
114 * of gmx_simd_round_f(), since vec_round() does not adhere to the FP control
115 * word rounding scheme. We rely on float-to-float and float-to-integer
116 * rounding being the same for half-way values in a few algorithms.
118 #define gmx_simd_round_f(a) vec_cfid(vec_ctid(a))
119 #define gmx_simd_trunc_f(a) vec_trunc(a)
120 #define gmx_simd_fraction_f(x) vec_sub(x, vec_trunc(x))
121 #define gmx_simd_get_exponent_f(a) gmx_simd_get_exponent_ibm_qpx(a)
122 #define gmx_simd_get_mantissa_f(a) gmx_simd_get_mantissa_ibm_qpx(a)
123 #define gmx_simd_set_exponent_f(a) gmx_simd_set_exponent_ibm_qpx(a)
124 /* integer datatype corresponding to float: gmx_simd_fint32_t */
125 #define gmx_simd_fint32_t vector4double
127 # define gmx_simd_load_fi(m) vec_ldia(0, (int *)(m))
129 # define gmx_simd_load_fi(m) vec_ldiaa(0, (int *)(m))
131 #define gmx_simd_set1_fi(i) gmx_simd_set1_int_ibm_qpx(i)
132 #define gmx_simd_store_fi(m, x) vec_st(x, 0, (int *)(m))
133 #define gmx_simd_setzero_fi gmx_simd_setzero_ibm_qpx
134 #define gmx_simd_cvt_f2i(a) vec_ctiw(a)
135 #define gmx_simd_cvtt_f2i(a) vec_ctiwz(a)
136 #define gmx_simd_cvt_i2f(a) vec_cfid(a)
137 /* Integer simd extract not available */
138 /* Integer logical ops on gmx_simd_fint32_t not supported */
139 /* Integer arithmetic ops on gmx_simd_fint32_t not supported */
140 /* Boolean & comparison operations on gmx_simd_float_t */
141 #define gmx_simd_fbool_t vector4double
142 #define gmx_simd_cmpeq_f(a, b) vec_cmpeq(a, b)
143 #define gmx_simd_cmplt_f(a, b) vec_cmplt((a), (b))
144 #define gmx_simd_cmple_f(a, b) gmx_simd_or_fb(vec_cmpeq(a, b), vec_cmplt(a, b))
145 #define gmx_simd_and_fb(a, b) vec_and(a, b)
146 #define gmx_simd_or_fb(a, b) vec_or(a, b)
147 #define gmx_simd_anytrue_fb(a) gmx_simd_anytrue_bool_ibm_qpx(a)
148 #define gmx_simd_blendzero_f(a, sel) vec_sel(vec_splats(0.0), a, sel)
149 #define gmx_simd_blendnotzero_f(a, sel) vec_sel(a, vec_splats(0.0), sel)
150 #define gmx_simd_blendv_f(a, b, sel) vec_sel(a, b, sel)
151 #define gmx_simd_reduce_f(a) gmx_simd_reduce_ibm_qpx(a)
154 /* Boolean & comparison operations on gmx_simd_fint32_t not supported */
155 /* Conversions between different booleans not supported */
157 static __attribute__((always_inline)) vector4double
158 gmx_simd_fneg_ibm_qpx(vector4double a)
162 /****************************************************
163 * DOUBLE PRECISION SIMD IMPLEMENTATION *
164 ****************************************************/
165 #define gmx_simd_double_t vector4double
167 # define gmx_simd_load_d(m) vec_ld(0, (double *)(m))
168 # define gmx_simd_store_d(m, a) vec_st(a, 0, (double *)(m))
170 # define gmx_simd_load_d(m) vec_lda(0, (double *)(m))
171 # define gmx_simd_store_d(m, a) vec_sta(a, 0, (double *)(m))
173 # define gmx_simd_load1_d(m) vec_lds(0, (double *)(m))
174 #define gmx_simd_set1_d(x) vec_splats(x)
175 /* No support for unaligned load/store */
176 #define gmx_simd_setzero_d gmx_simd_setzero_ibm_qpx
177 #define gmx_simd_add_d(a, b) vec_add(a, b)
178 #define gmx_simd_sub_d(a, b) vec_sub(a, b)
179 #define gmx_simd_mul_d(a, b) vec_mul(a, b)
180 #define gmx_simd_fmadd_d(a, b, c) vec_madd(a, b, c)
181 #define gmx_simd_fmsub_d(a, b, c) vec_msub(a, b, c)
182 /* IBM uses an alternative FMA definition, so -a*b+c=-(a*b-c) is "nmsub" */
183 #define gmx_simd_fnmadd_d(a, b, c) vec_nmsub(a, b, c)
184 /* IBM uses an alternative FMA definition, so -a*b-c=-(a*b+c) is "nmadd" */
185 #define gmx_simd_fnmsub_d(a, b, c) vec_nmadd(a, b, c)
186 /* gmx_simd_and_d not supported - no bitwise logical ops */
187 /* gmx_simd_andnot_d not supported - no bitwise logical ops */
188 /* gmx_simd_or_d not supported - no bitwise logical ops */
189 /* gmx_simd_xor_d not supported - no bitwise logical ops */
190 #define gmx_simd_rsqrt_d(a) vec_rsqrte(a)
191 #define gmx_simd_rcp_d(a) vec_re(a)
192 #define gmx_simd_fabs_d(a) vec_abs(a)
193 #define gmx_simd_fneg_d gmx_simd_fneg_ibm_qpx
194 #define gmx_simd_max_d(a, b) vec_sel(b, a, vec_sub(a, b))
195 #define gmx_simd_min_d(a, b) vec_sel(b, a, vec_sub(b, a))
196 /* Note: It is critical to use vec_cfid(vec_ctid(a)) for the implementation
197 * of gmx_simd_round_f(), since vec_round() does not adhere to the FP control
198 * word rounding scheme. We rely on float-to-float and float-to-integer
199 * rounding being the same for half-way values in a few algorithms.
201 #define gmx_simd_round_d(a) vec_cfid(vec_ctid(a))
202 #define gmx_simd_trunc_d(a) vec_trunc(a)
203 #define gmx_simd_fraction_d(x) vec_sub(x, vec_trunc(x))
204 #define gmx_simd_get_exponent_d(a) gmx_simd_get_exponent_ibm_qpx(a)
205 #define gmx_simd_get_mantissa_d(a) gmx_simd_get_mantissa_ibm_qpx(a)
206 #define gmx_simd_set_exponent_d(a) gmx_simd_set_exponent_ibm_qpx(a)
207 /* integer datatype corresponding to double: gmx_simd_dint32_t */
208 #define gmx_simd_dint32_t vector4double
210 # define gmx_simd_load_di(m) vec_ldia(0, (int *)(m))
212 # define gmx_simd_load_di(m) vec_ldiaa(0, (int *)(m))
214 #define gmx_simd_set1_di(i) gmx_simd_set1_int_ibm_qpx(i)
215 #define gmx_simd_store_di(m, x) vec_st(x, 0, (int *)(m))
216 #define gmx_simd_setzero_di gmx_simd_setzero_ibm_qpx
217 #define gmx_simd_cvt_d2i(a) vec_ctiw(a)
218 #define gmx_simd_cvtt_d2i(a) vec_ctiwz(a)
219 #define gmx_simd_cvt_i2d(a) vec_cfid(a)
220 /* Integer simd extract not available */
221 /* Integer logical ops on gmx_simd_dint32_t not supported */
222 /* Integer arithmetic ops on gmx_simd_dint32_t not supported */
223 /* Boolean & comparison operations on gmx_simd_double_t */
224 #define gmx_simd_dbool_t vector4double
225 #define gmx_simd_cmpeq_d(a, b) vec_cmpeq(a, b)
226 #define gmx_simd_cmplt_d(a, b) vec_cmplt((a), (b))
227 #define gmx_simd_cmple_d(a, b) gmx_simd_or_fb(vec_cmpeq(a, b), vec_cmplt(a, b))
228 #define gmx_simd_and_db(a, b) vec_and(a, b)
229 #define gmx_simd_or_db(a, b) vec_or(a, b)
230 #define gmx_simd_anytrue_db(a) gmx_simd_anytrue_bool_ibm_qpx(a)
231 #define gmx_simd_blendzero_d(a, sel) vec_sel(vec_splats(0.0), a, sel)
232 #define gmx_simd_blendnotzero_d(a, sel) vec_sel(a, vec_splats(0.0), sel)
233 #define gmx_simd_blendv_d(a, b, sel) vec_sel(a, b, sel)
234 #define gmx_simd_reduce_d(a) gmx_simd_reduce_ibm_qpx(a)
236 /* Boolean & comparison operations on gmx_simd_dint32_t not supported */
237 /* Conversions between different booleans not supported */
240 /****************************************************
241 * IMPLEMENTATION HELPER FUNCTIONS *
242 ****************************************************/
243 static __attribute__((always_inline)) vector4double gmx_simdcall
244 gmx_simd_setzero_ibm_qpx(void)
246 return vec_splats(0.0);
249 static __attribute__((always_inline)) vector4double gmx_simdcall
250 gmx_simd_get_exponent_ibm_qpx(vector4double x)
252 const gmx_int64_t expmask = 0x7ff0000000000000LL;
253 const gmx_int64_t expbase = 1023;
254 gmx_int64_t idata[4] __attribute__((aligned(32)));
256 /* Store to memory */
258 /* Perform integer arithmetics in general registers. */
259 idata[0] = ((idata[0] & expmask) >> 52) - expbase;
260 idata[1] = ((idata[1] & expmask) >> 52) - expbase;
261 idata[2] = ((idata[2] & expmask) >> 52) - expbase;
262 idata[3] = ((idata[3] & expmask) >> 52) - expbase;
263 /* Reload from memory */
264 return vec_cfid(vec_ld(0, idata));
267 static __attribute__((always_inline)) vector4double gmx_simdcall
268 gmx_simd_get_mantissa_ibm_qpx(vector4double x)
270 const gmx_int64_t exp_and_sign_mask = 0xfff0000000000000LL;
271 const gmx_int64_t ione = 0x3ff0000000000000LL;
272 gmx_int64_t idata[4] __attribute__((aligned(32)));
274 /* Store to memory */
276 /* Perform integer arithmetics in general registers. */
277 idata[0] = (idata[0] & (~exp_and_sign_mask)) | ione;
278 idata[1] = (idata[1] & (~exp_and_sign_mask)) | ione;
279 idata[2] = (idata[2] & (~exp_and_sign_mask)) | ione;
280 idata[3] = (idata[3] & (~exp_and_sign_mask)) | ione;
281 /* Reload from memory */
282 return vec_ld(0, idata);
285 static __attribute__((always_inline)) vector4double gmx_simdcall
286 gmx_simd_set_exponent_ibm_qpx(vector4double x)
288 const gmx_int64_t expbase = 1023;
289 gmx_int64_t idata[4] __attribute__((aligned(32)));
291 /* Store to memory for shifts. It is REALLY critical that we use the same
292 * rounding mode as for gmx_simd_round_r() here. In particular, for QPX
293 * this means we implement gmx_simd_round_r(a) as vec_cfid(vec_ctid(a)),
294 * since vec_round() uses a different rounding scheme.
296 vec_st(vec_ctid(x), 0, idata);
297 /* Perform integer arithmetics in general registers. */
298 idata[0] = (idata[0] + expbase) << 52;
299 idata[1] = (idata[1] + expbase) << 52;
300 idata[2] = (idata[2] + expbase) << 52;
301 idata[3] = (idata[3] + expbase) << 52;
302 /* Reload from memory */
303 return vec_ld(0, idata);
306 static __attribute__((always_inline)) double gmx_simdcall
307 gmx_simd_reduce_ibm_qpx(vector4double x)
309 vector4double y = vec_sldw(x, x, 2);
313 z = vec_sldw(y, y, 1);
315 return vec_extract(y, 0);
318 static __attribute__((always_inline)) vector4double gmx_simdcall
319 gmx_simd_set1_int_ibm_qpx(int i)
321 int idata[4] __attribute__((aligned(32)));
325 /* Reload from memory */
326 return vec_splat(vec_ldia(0, idata), 0);
329 /* This works in both single and double */
330 static __attribute__((always_inline)) int gmx_simdcall
331 gmx_simd_anytrue_bool_ibm_qpx(vector4double a)
333 vector4double b = vec_sldw(a, a, 2);
336 b = vec_sldw(a, a, 1);
338 return (vec_extract(a, 0) > 0);
341 /* QPX is already 4-wide both in single and double, so just reuse for SIMD4 */
344 #define gmx_simd4_float_t gmx_simd_float_t
345 #define gmx_simd4_load_f gmx_simd_load_f
346 #define gmx_simd4_load1_f gmx_simd_load1_f
347 #define gmx_simd4_set1_f gmx_simd_set1_f
348 #define gmx_simd4_store_f gmx_simd_store_f
349 #define gmx_simd4_loadu_f gmx_simd_loadu_f
350 #define gmx_simd4_storeu_f gmx_simd_storeu_f
351 #define gmx_simd4_setzero_f gmx_simd_setzero_f
352 #define gmx_simd4_add_f gmx_simd_add_f
353 #define gmx_simd4_sub_f gmx_simd_sub_f
354 #define gmx_simd4_mul_f gmx_simd_mul_f
355 #define gmx_simd4_fmadd_f gmx_simd_fmadd_f
356 #define gmx_simd4_fmsub_f gmx_simd_fmsub_f
357 #define gmx_simd4_fnmadd_f gmx_simd_fnmadd_f
358 #define gmx_simd4_fnmsub_f gmx_simd_fnmsub_f
359 #define gmx_simd4_and_f gmx_simd_and_f
360 #define gmx_simd4_andnot_f gmx_simd_andnot_f
361 #define gmx_simd4_or_f gmx_simd_or_f
362 #define gmx_simd4_xor_f gmx_simd_xor_f
363 #define gmx_simd4_rsqrt_f gmx_simd_rsqrt_f
364 #define gmx_simd4_rcp_f gmx_simd_rcp_f
365 #define gmx_simd4_fabs_f gmx_simd_fabs_f
366 #define gmx_simd4_fneg_f gmx_simd_fneg_f
367 #define gmx_simd4_max_f gmx_simd_max_f
368 #define gmx_simd4_min_f gmx_simd_min_f
369 #define gmx_simd4_round_f gmx_simd_round_f
370 #define gmx_simd4_trunc_f gmx_simd_trunc_f
371 #define gmx_simd4_fraction_f gmx_simd_fraction_f
372 #define gmx_simd4_get_exponent_f gmx_simd_get_exponent_f
373 #define gmx_simd4_get_mantissa_f gmx_simd_get_mantissa_f
374 #define gmx_simd4_set_exponent_f gmx_simd_set_exponent_f
375 #define gmx_simd4_dotproduct3_f gmx_simd4_dotproduct3_f_ibm_qpx
376 #define gmx_simd4_fint32_t gmx_simd_fint32_t
377 #define gmx_simd4_load_fi gmx_simd_load_fi
378 #define gmx_simd4_load1_fi gmx_simd_load1_fi
379 #define gmx_simd4_set1_fi gmx_simd_set1_fi
380 #define gmx_simd4_store_fi gmx_simd_store_fi
381 #define gmx_simd4_loadu_fi gmx_simd_loadu_fi
382 #define gmx_simd4_storeu_fi gmx_simd_storeu_fi
383 #define gmx_simd4_setzero_fi gmx_simd_setzero_fi
384 #define gmx_simd4_cvt_f2i gmx_simd_cvt_f2i
385 #define gmx_simd4_cvtt_f2i gmx_simd_cvtt_f2i
386 #define gmx_simd4_cvt_i2f gmx_simd_cvt_i2f
387 #define gmx_simd4_fbool_t gmx_simd_fbool_t
388 #define gmx_simd4_cmpeq_f gmx_simd_cmpeq_f
389 #define gmx_simd4_cmplt_f gmx_simd_cmplt_f
390 #define gmx_simd4_cmple_f gmx_simd_cmple_f
391 #define gmx_simd4_and_fb gmx_simd_and_fb
392 #define gmx_simd4_or_fb gmx_simd_or_fb
393 #define gmx_simd4_anytrue_fb gmx_simd_anytrue_fb
394 #define gmx_simd4_blendzero_f gmx_simd_blendzero_f
395 #define gmx_simd4_blendnotzero_f gmx_simd_blendnotzero_f
396 #define gmx_simd4_blendv_f gmx_simd_blendv_f
397 #define gmx_simd4_reduce_f gmx_simd_reduce_f
399 #define gmx_simd4_double_t gmx_simd_double_t
400 #define gmx_simd4_load_d gmx_simd_load_d
401 #define gmx_simd4_load1_d gmx_simd_load1_d
402 #define gmx_simd4_set1_d gmx_simd_set1_d
403 #define gmx_simd4_store_d gmx_simd_store_d
404 #define gmx_simd4_loadu_d gmx_simd_loadu_d
405 #define gmx_simd4_storeu_d gmx_simd_storeu_d
406 #define gmx_simd4_setzero_d gmx_simd_setzero_d
407 #define gmx_simd4_add_d gmx_simd_add_d
408 #define gmx_simd4_sub_d gmx_simd_sub_d
409 #define gmx_simd4_mul_d gmx_simd_mul_d
410 #define gmx_simd4_fmadd_d gmx_simd_fmadd_d
411 #define gmx_simd4_fmsub_d gmx_simd_fmsub_d
412 #define gmx_simd4_fnmadd_d gmx_simd_fnmadd_d
413 #define gmx_simd4_fnmsub_d gmx_simd_fnmsub_d
414 #define gmx_simd4_and_d gmx_simd_and_d
415 #define gmx_simd4_andnot_d gmx_simd_andnot_d
416 #define gmx_simd4_or_d gmx_simd_or_d
417 #define gmx_simd4_xor_d gmx_simd_xor_d
418 #define gmx_simd4_rsqrt_d gmx_simd_rsqrt_d
419 #define gmx_simd4_rcp_d gmx_simd_rcp_d
420 #define gmx_simd4_fabs_d gmx_simd_fabs_d
421 #define gmx_simd4_fneg_d gmx_simd_fneg_d
422 #define gmx_simd4_max_d gmx_simd_max_d
423 #define gmx_simd4_min_d gmx_simd_min_d
424 #define gmx_simd4_round_d gmx_simd_round_d
425 #define gmx_simd4_trunc_d gmx_simd_trunc_d
426 #define gmx_simd4_fraction_d gmx_simd_fraction_d
427 #define gmx_simd4_get_exponent_d gmx_simd_get_exponent_d
428 #define gmx_simd4_get_mantissa_d gmx_simd_get_mantissa_d
429 #define gmx_simd4_set_exponent_d gmx_simd_set_exponent_d
430 #define gmx_simd4_dotproduct3_d gmx_simd4_dotproduct3_d_ibm_qpx
431 #define gmx_simd4_dint32_t gmx_simd_dint32_t
432 #define gmx_simd4_load_di gmx_simd_load_di
433 #define gmx_simd4_load1_di gmx_simd_load1_di
434 #define gmx_simd4_set1_di gmx_simd_set1_di
435 #define gmx_simd4_store_di gmx_simd_store_di
436 #define gmx_simd4_loadu_di gmx_simd_loadu_di
437 #define gmx_simd4_storeu_di gmx_simd_storeu_di
438 #define gmx_simd4_setzero_di gmx_simd_setzero_di
439 #define gmx_simd4_cvt_d2i gmx_simd_cvt_d2i
440 #define gmx_simd4_cvtt_d2i gmx_simd_cvtt_d2i
441 #define gmx_simd4_cvt_i2f gmx_simd_cvt_i2f
442 #define gmx_simd4_dbool_t gmx_simd_dbool_t
443 #define gmx_simd4_cmpeq_d gmx_simd_cmpeq_d
444 #define gmx_simd4_cmplt_d gmx_simd_cmplt_d
445 #define gmx_simd4_cmple_d gmx_simd_cmple_d
446 #define gmx_simd4_and_db gmx_simd_and_db
447 #define gmx_simd4_or_db gmx_simd_or_db
448 #define gmx_simd4_anytrue_db gmx_simd_anytrue_db
449 #define gmx_simd4_blendzero_d gmx_simd_blendzero_d
450 #define gmx_simd4_blendnotzero_d gmx_simd_blendnotzero_d
451 #define gmx_simd4_blendv_d gmx_simd_blendv_d
452 #define gmx_simd4_reduce_d gmx_simd_reduce_d
454 static __attribute__((always_inline)) double gmx_simdcall
455 gmx_simd4_dotproduct3_d_ibm_qpx(vector4double a, vector4double b)
457 vector4double dp_sh0 = vec_mul(a, b);
458 vector4double dp_sh1 = vec_sldw(dp_sh0, dp_sh0, 1);
459 vector4double dp_sh2 = vec_sldw(dp_sh0, dp_sh0, 2);
460 vector4double dp = vec_add(dp_sh2, vec_add(dp_sh0, dp_sh1));
462 return vec_extract(dp, 0);
465 static __attribute__((always_inline)) float gmx_simdcall
466 gmx_simd4_dotproduct3_f_ibm_qpx(vector4double a, vector4double b)
468 return (float)gmx_simd4_dotproduct3_d_ibm_qpx(a, b);
471 /* Function to check whether SIMD operations have resulted in overflow.
472 * For now, this is unfortunately a dummy for this architecture.
475 gmx_simd_check_and_reset_overflow(void)
480 #endif /* GMX_SIMD_IMPLEMENTATION_IBM_QPX_H */