include/gmx_avx_double.h

   1 /*
   2  *                This source code is part of
   3  *
   4  *                 G   R   O   M   A   C   S
   5  *
   6  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   7  * Copyright (c) 2001-2012, The GROMACS Development Team
   8  *
   9  * Gromacs is a library for molecular simulation and trajectory analysis,
  10  * written by Erik Lindahl, David van der Spoel, Berk Hess, and others - for
  11  * a full list of developers and information, check out http://www.gromacs.org
  12  *
  13  * This program is free software; you can redistribute it and/or modify it under
  14  * the terms of the GNU Lesser General Public License as published by the Free
  15  * Software Foundation; either version 2 of the License, or (at your option) any
  16  * later version.
  17  * As a special exception, you may use this file as part of a free software
  18  * library without restriction.  Specifically, if other files instantiate
  19  * templates or use macros or inline functions from this file, or you compile
  20  * this file and link it with other files to produce an executable, this
  21  * file does not by itself cause the resulting executable to be covered by
  22  * the GNU Lesser General Public License.
  23  *
  24  * In plain-speak: do not worry about classes/macros/templates either - only
  25  * changes to the library have to be LGPL, not an application linking with it.
  26  *
  27  * To help fund GROMACS development, we humbly ask that you cite
  28  * the papers people have written on it - you can find them on the website!
  29  */
  30 #ifndef _gmx_avx_double_h_
  31 #define _gmx_avx_double_h_
  32
  33 /* We require AVX now! */
  34
  35 #include <immintrin.h> /* AVX */
  36
  37 static inline __m256d
  38 gmx_mm256_invsqrt_pd(__m256d x)
  39 {
  40     /* There is no double precision AVX rsqrt instruction.
  41      * But using a single precision rsqrt still gives the full precision.
  42      */
  43     const __m256d half    = _mm256_set_pd(0.5,0.5,0.5,0.5);
  44     const __m256d three   = _mm256_set_pd(3.0,3.0,3.0,3.0);
  45
  46     __m256d lu = _mm256_cvtps_pd(_mm_rsqrt_ps(_mm256_cvtpd_ps(x)));
  47
  48     lu = _mm256_mul_pd(half,_mm256_mul_pd(_mm256_sub_pd(three,_mm256_mul_pd(_mm256_mul_pd(lu,lu),x)),lu));
  49     return _mm256_mul_pd(half,_mm256_mul_pd(_mm256_sub_pd(three,_mm256_mul_pd(_mm256_mul_pd(lu,lu),x)),lu));
  50 }
  51
  52 static inline __m256d
  53 gmx_mm256_calc_rsq_pd(__m256d dx, __m256d dy, __m256d dz)
  54 {
  55     return _mm256_add_pd( _mm256_add_pd( _mm256_mul_pd(dx,dx), _mm256_mul_pd(dy,dy) ), _mm256_mul_pd(dz,dz) );
  56 }
  57
  58 /* Normal sum of four xmm registers */
  59 #define gmx_mm256_sum4_pd(t0,t1,t2,t3)  _mm256_add_pd(_mm256_add_pd(t0,t1),_mm256_add_pd(t2,t3))
  60
  61 #endif /* gmx_avx_double_h_ */