2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2012, The GROMACS Development Team
6 * Copyright (c) 2012,2013, by the GROMACS development team, led by
7 * David van der Spoel, Berk Hess, Erik Lindahl, and including many
8 * others, as listed in the AUTHORS file in the top-level source
9 * directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
38 #ifndef _gmx_simd4_ref_h_
39 #define _gmx_simd4_ref_h_
41 /* This file contains a reference plain-C implementation of 4-wide SIMD.
42 * This code is only useful for testing and documentation.
43 * Either float or double precision is supported through gmx_simd4_real,
44 * which is set in gmx_simd4_macros.h
50 /* float/double SIMD register type */
52 gmx_simd4_real r[GMX_SIMD4_WIDTH];
55 /* boolean SIMD register type */
57 char r[GMX_SIMD4_WIDTH];
61 /* Load GMX_SIMD4_WIDTH reals for memory starting at r */
62 static gmx_inline gmx_simd4_ref_pr
63 gmx_simd4_ref_load_pr(const gmx_simd4_real *r)
68 for (i = 0; i < GMX_SIMD4_WIDTH; i++)
76 /* Set all SIMD register elements to r */
77 static gmx_inline gmx_simd4_ref_pr
78 gmx_simd4_ref_set1_pr(gmx_simd4_real r)
83 for (i = 0; i < GMX_SIMD4_WIDTH; i++)
91 /* Set all SIMD register elements to 0 */
92 static gmx_inline gmx_simd4_ref_pr
93 gmx_simd4_ref_setzero_pr()
98 for (i = 0; i < GMX_SIMD4_WIDTH; i++)
106 static gmx_inline void
107 gmx_simd4_ref_store_pr(gmx_simd4_real *dest, gmx_simd4_ref_pr src)
111 for (i = 0; i < GMX_SIMD4_WIDTH; i++)
117 static gmx_inline gmx_simd4_ref_pr
118 gmx_simd4_ref_add_pr(gmx_simd4_ref_pr a, gmx_simd4_ref_pr b)
123 for (i = 0; i < GMX_SIMD4_WIDTH; i++)
125 c.r[i] = a.r[i] + b.r[i];
131 static gmx_inline gmx_simd4_ref_pr
132 gmx_simd4_ref_sub_pr(gmx_simd4_ref_pr a, gmx_simd4_ref_pr b)
137 for (i = 0; i < GMX_SIMD4_WIDTH; i++)
139 c.r[i] = a.r[i] - b.r[i];
145 static gmx_inline gmx_simd4_ref_pr
146 gmx_simd4_ref_mul_pr(gmx_simd4_ref_pr a, gmx_simd4_ref_pr b)
151 for (i = 0; i < GMX_SIMD4_WIDTH; i++)
153 c.r[i] = a.r[i]*b.r[i];
159 static gmx_inline gmx_simd4_ref_pr
160 gmx_simd4_ref_madd_pr(gmx_simd4_ref_pr a, gmx_simd4_ref_pr b, gmx_simd4_ref_pr c)
165 for (i = 0; i < GMX_SIMD4_WIDTH; i++)
167 d.r[i] = a.r[i]*b.r[i] + c.r[i];
173 static gmx_inline gmx_simd4_ref_pr
174 gmx_simd4_ref_nmsub_pr(gmx_simd4_ref_pr a, gmx_simd4_ref_pr b, gmx_simd4_ref_pr c)
179 for (i = 0; i < GMX_SIMD4_WIDTH; i++)
181 d.r[i] = -a.r[i]*b.r[i] + c.r[i];
187 static gmx_inline gmx_simd4_real
188 gmx_simd4_ref_dotproduct3(gmx_simd4_ref_pr a, gmx_simd4_ref_pr b)
194 for (i = 0; i < 3; i++)
202 static gmx_inline gmx_simd4_ref_pr
203 gmx_simd4_ref_min_pr(gmx_simd4_ref_pr a, gmx_simd4_ref_pr b)
208 for (i = 0; i < GMX_SIMD4_WIDTH; i++)
210 c.r[i] = (a.r[i] <= b.r[i] ? a.r[i] : b.r[i]);
216 static gmx_inline gmx_simd4_ref_pr
217 gmx_simd4_ref_max_pr(gmx_simd4_ref_pr a, gmx_simd4_ref_pr b)
222 for (i = 0; i < GMX_SIMD4_WIDTH; i++)
224 c.r[i] = (a.r[i] >= b.r[i] ? a.r[i] : b.r[i]);
230 static gmx_inline gmx_simd4_ref_pr
231 gmx_simd4_ref_blendzero_pr(gmx_simd4_ref_pr a, gmx_simd4_ref_pb b)
236 for (i = 0; i < GMX_SIMD4_WIDTH; i++)
238 c.r[i] = (b.r[i] ? a.r[i] : 0.0);
245 static gmx_inline gmx_simd4_ref_pb
246 gmx_simd4_ref_cmplt_pr(gmx_simd4_ref_pr a, gmx_simd4_ref_pr b)
251 for (i = 0; i < GMX_SIMD4_WIDTH; i++)
253 c.r[i] = (a.r[i] < b.r[i]);
259 /* Logical AND on SIMD booleans */
260 static gmx_inline gmx_simd4_ref_pb
261 gmx_simd4_ref_and_pb(gmx_simd4_ref_pb a, gmx_simd4_ref_pb b)
266 for (i = 0; i < GMX_SIMD4_WIDTH; i++)
268 c.r[i] = (a.r[i] && b.r[i]);
274 /* Logical OR on SIMD booleans */
275 static gmx_inline gmx_simd4_ref_pb
276 gmx_simd4_ref_or_pb(gmx_simd4_ref_pb a, gmx_simd4_ref_pb b)
281 for (i = 0; i < GMX_SIMD4_WIDTH; i++)
283 c.r[i] = (a.r[i] || b.r[i]);
289 /* gmx_anytrue_pb(x) returns if any of the boolean is x is True */
290 static gmx_inline int
291 gmx_simd4_ref_anytrue_pb(gmx_simd4_ref_pb a)
297 for (i = 0; i < GMX_SIMD4_WIDTH; i++)
308 #endif /* _gmx_simd4_ref_h_ */