src/gromacs/gpu_utils/gpu_vec.cuh

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2018,2019, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 #ifndef GMX_GPU_UTILS_GPU_VEC_CUH
  36 #define GMX_GPU_UTILS_GPU_VEC_CUH
  37
  38 /* Note that because of the duplicate of ivec, this header (or an
  39  * OpenCL port of it) cannot be included in a translation unit that
  40  * also includes the normal vectypes.h */
  41 #define XX 0 /* Defines for indexing in */
  42 #define YY 1 /* vectors                 */
  43 #define ZZ 2
  44 #define DIM 3 /* Dimension of vectors    */
  45 typedef int   ivec[DIM];
  46 typedef float fvec[DIM];
  47
  48 /* maths operations */
  49 /* imported from cpu versions in math/vec.h */
  50 __forceinline__ __device__ void svmul_gpu(float a, const fvec v1, fvec v2)
  51 {
  52     v2[XX] = a * v1[XX];
  53     v2[YY] = a * v1[YY];
  54     v2[ZZ] = a * v1[ZZ];
  55 }
  56
  57
  58 __forceinline__ __device__ void fvec_add_gpu(const fvec a, const fvec b, fvec c)
  59 {
  60     float x, y, z;
  61
  62     x = a[XX] + b[XX];
  63     y = a[YY] + b[YY];
  64     z = a[ZZ] + b[ZZ];
  65
  66     c[XX] = x;
  67     c[YY] = y;
  68     c[ZZ] = z;
  69 }
  70
  71 __forceinline__ __device__ void ivec_add_gpu(const ivec a, const ivec b, ivec c)
  72 {
  73     int x, y, z;
  74
  75     x = a[XX] + b[XX];
  76     y = a[YY] + b[YY];
  77     z = a[ZZ] + b[ZZ];
  78
  79     c[XX] = x;
  80     c[YY] = y;
  81     c[ZZ] = z;
  82 }
  83
  84 __forceinline__ __device__ void fvec_inc_atomic(fvec a, const fvec b)
  85 {
  86     atomicAdd(&a[XX], b[XX]);
  87     atomicAdd(&a[YY], b[YY]);
  88     atomicAdd(&a[ZZ], b[ZZ]);
  89 }
  90
  91 __forceinline__ __device__ void fvec_inc_gpu(fvec a, const fvec b)
  92 {
  93     float x, y, z;
  94
  95     x = a[XX] + b[XX];
  96     y = a[YY] + b[YY];
  97     z = a[ZZ] + b[ZZ];
  98
  99     a[XX] = x;
 100     a[YY] = y;
 101     a[ZZ] = z;
 102 }
 103
 104 __forceinline__ __device__ void fvec_dec_atomic(fvec a, const fvec b)
 105 {
 106     atomicAdd(&a[XX], -1.0f * b[XX]);
 107     atomicAdd(&a[YY], -1.0f * b[YY]);
 108     atomicAdd(&a[ZZ], -1.0f * b[ZZ]);
 109 }
 110
 111 __forceinline__ __device__ void fvec_dec_gpu(fvec a, const fvec b)
 112 {
 113     float x, y, z;
 114
 115     x = a[XX] - b[XX];
 116     y = a[YY] - b[YY];
 117     z = a[ZZ] - b[ZZ];
 118
 119     a[XX] = x;
 120     a[YY] = y;
 121     a[ZZ] = z;
 122 }
 123
 124 __forceinline__ __device__ void cprod_gpu(const fvec a, const fvec b, fvec c)
 125 {
 126     c[XX] = a[YY] * b[ZZ] - a[ZZ] * b[YY];
 127     c[YY] = a[ZZ] * b[XX] - a[XX] * b[ZZ];
 128     c[ZZ] = a[XX] * b[YY] - a[YY] * b[XX];
 129 }
 130
 131 __forceinline__ __device__ float iprod_gpu(const fvec a, const fvec b)
 132 {
 133     return (a[XX] * b[XX] + a[YY] * b[YY] + a[ZZ] * b[ZZ]);
 134 }
 135
 136 __forceinline__ __device__ float norm_gpu(const fvec a)
 137 {
 138     return sqrt(iprod_gpu(a, a));
 139 }
 140
 141 __forceinline__ __device__ float gmx_angle_gpu(const fvec a, const fvec b)
 142 {
 143     fvec  w;
 144     float wlen, s;
 145
 146     cprod_gpu(a, b, w);
 147
 148     wlen = norm_gpu(w);
 149     s    = iprod_gpu(a, b);
 150
 151     return atan2f(wlen, s); // requires float
 152 }
 153
 154 __forceinline__ __device__ void clear_ivec_gpu(ivec a)
 155 {
 156     a[XX] = 0;
 157     a[YY] = 0;
 158     a[ZZ] = 0;
 159 }
 160 __forceinline__ __device__ void fvec_sub_gpu(const fvec a, const fvec b, fvec c)
 161 {
 162     float x, y, z;
 163
 164     x = a[XX] - b[XX];
 165     y = a[YY] - b[YY];
 166     z = a[ZZ] - b[ZZ];
 167
 168     c[XX] = x;
 169     c[YY] = y;
 170     c[ZZ] = z;
 171 }
 172
 173 __forceinline__ __device__ float norm2_gpu(const fvec a)
 174 {
 175     return a[XX] * a[XX] + a[YY] * a[YY] + a[ZZ] * a[ZZ];
 176 }
 177
 178 __forceinline__ __device__ void copy_fvec_gpu(const fvec a, fvec b)
 179 {
 180     b[XX] = a[XX];
 181     b[YY] = a[YY];
 182     b[ZZ] = a[ZZ];
 183 }
 184
 185 __forceinline__ __device__ void copy_ivec_gpu(const ivec a, ivec b)
 186 {
 187     b[XX] = a[XX];
 188     b[YY] = a[YY];
 189     b[ZZ] = a[ZZ];
 190 }
 191
 192 __forceinline__ __device__ float cos_angle_gpu(const fvec a, const fvec b)
 193 {
 194     /*
 195      *                  ax*bx + ay*by + az*bz
 196      * cos-vec (a,b) =  ---------------------
 197      *                      ||a|| * ||b||
 198      */
 199     float cosval;
 200     int   m;
 201     float aa, bb, ip, ipa, ipb, ipab;
 202
 203     ip = ipa = ipb = 0.0f;
 204     for (m = 0; (m < DIM); m++)
 205     {
 206         aa = a[m];
 207         bb = b[m];
 208         ip += aa * bb;
 209         ipa += aa * aa;
 210         ipb += bb * bb;
 211     }
 212     ipab = ipa * ipb;
 213     if (ipab > 0.0f)
 214     {
 215         cosval = ip * rsqrt(ipab);
 216     }
 217     else
 218     {
 219         cosval = 1.0f;
 220     }
 221     if (cosval > 1.0f)
 222     {
 223         return 1.0f;
 224     }
 225     if (cosval < -1.0f)
 226     {
 227         return -1.0f;
 228     }
 229
 230     return cosval;
 231 }
 232
 233
 234 __device__ static inline void unitv_gpu(const fvec src, fvec dest)
 235 {
 236     float linv;
 237
 238     linv     = rsqrt(norm2_gpu(src));
 239     dest[XX] = linv * src[XX];
 240     dest[YY] = linv * src[YY];
 241     dest[ZZ] = linv * src[ZZ];
 242 }
 243
 244 #endif