2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2018,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
35 #ifndef GMX_GPU_UTILS_GPU_VEC_CUH
36 #define GMX_GPU_UTILS_GPU_VEC_CUH
38 /* Note that because of the duplicate of ivec, this header (or an
39 * OpenCL port of it) cannot be included in a translation unit that
40 * also includes the normal vectypes.h */
41 #define XX 0 /* Defines for indexing in */
42 #define YY 1 /* vectors */
44 #define DIM 3 /* Dimension of vectors */
45 typedef int ivec[DIM];
46 typedef float fvec[DIM];
48 /* maths operations */
49 /* imported from cpu versions in math/vec.h */
50 __forceinline__ __device__ void svmul_gpu(float a, const fvec v1, fvec v2)
58 __forceinline__ __device__ void fvec_add_gpu(const fvec a, const fvec b, fvec c)
71 __forceinline__ __device__ void ivec_add_gpu(const ivec a, const ivec b, ivec c)
84 __forceinline__ __device__ void fvec_inc_atomic(fvec a, const fvec b)
86 atomicAdd(&a[XX], b[XX]);
87 atomicAdd(&a[YY], b[YY]);
88 atomicAdd(&a[ZZ], b[ZZ]);
91 __forceinline__ __device__ void fvec_inc_gpu(fvec a, const fvec b)
104 __forceinline__ __device__ void fvec_dec_atomic(fvec a, const fvec b)
106 atomicAdd(&a[XX], -1.0f * b[XX]);
107 atomicAdd(&a[YY], -1.0f * b[YY]);
108 atomicAdd(&a[ZZ], -1.0f * b[ZZ]);
111 __forceinline__ __device__ void fvec_dec_gpu(fvec a, const fvec b)
124 __forceinline__ __device__ void cprod_gpu(const fvec a, const fvec b, fvec c)
126 c[XX] = a[YY] * b[ZZ] - a[ZZ] * b[YY];
127 c[YY] = a[ZZ] * b[XX] - a[XX] * b[ZZ];
128 c[ZZ] = a[XX] * b[YY] - a[YY] * b[XX];
131 __forceinline__ __device__ float iprod_gpu(const fvec a, const fvec b)
133 return (a[XX] * b[XX] + a[YY] * b[YY] + a[ZZ] * b[ZZ]);
136 __forceinline__ __device__ float norm_gpu(const fvec a)
138 return sqrt(iprod_gpu(a, a));
141 __forceinline__ __device__ float gmx_angle_gpu(const fvec a, const fvec b)
151 return atan2f(wlen, s); // requires float
154 __forceinline__ __device__ void clear_ivec_gpu(ivec a)
160 __forceinline__ __device__ void fvec_sub_gpu(const fvec a, const fvec b, fvec c)
173 __forceinline__ __device__ float norm2_gpu(const fvec a)
175 return a[XX] * a[XX] + a[YY] * a[YY] + a[ZZ] * a[ZZ];
178 __forceinline__ __device__ void copy_fvec_gpu(const fvec a, fvec b)
185 __forceinline__ __device__ void copy_ivec_gpu(const ivec a, ivec b)
192 __forceinline__ __device__ float cos_angle_gpu(const fvec a, const fvec b)
195 * ax*bx + ay*by + az*bz
196 * cos-vec (a,b) = ---------------------
201 float aa, bb, ip, ipa, ipb, ipab;
203 ip = ipa = ipb = 0.0f;
204 for (m = 0; (m < DIM); m++)
215 cosval = ip * rsqrt(ipab);
234 __device__ static inline void unitv_gpu(const fvec src, fvec dest)
238 linv = rsqrt(norm2_gpu(src));
239 dest[XX] = linv * src[XX];
240 dest[YY] = linv * src[YY];
241 dest[ZZ] = linv * src[ZZ];