src/gromacs/simd/impl_x86_avx_512_knl/impl_x86_avx_512_knl_simd_float.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2014,2015,2016,2017, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 #ifndef GMX_SIMD_IMPL_X86_AVX_512_KNL_SIMD_FLOAT_H
  37 #define GMX_SIMD_IMPL_X86_AVX_512_KNL_SIMD_FLOAT_H
  38
  39 #include "config.h"
  40
  41 #include <immintrin.h>
  42
  43 #include "gromacs/math/utilities.h"
  44 #include "gromacs/simd/impl_x86_avx_512/impl_x86_avx_512_simd_float.h"
  45
  46 namespace gmx
  47 {
  48
  49 static inline SimdFloat gmx_simdcall
  50 rsqrt(SimdFloat x)
  51 {
  52     return {
  53                _mm512_rsqrt28_ps(x.simdInternal_)
  54     };
  55 }
  56
  57 static inline SimdFloat gmx_simdcall
  58 rcp(SimdFloat x)
  59 {
  60     return {
  61                _mm512_rcp28_ps(x.simdInternal_)
  62     };
  63 }
  64
  65 static inline SimdFloat gmx_simdcall
  66 maskzRsqrt(SimdFloat x, SimdFBool m)
  67 {
  68     return {
  69                _mm512_maskz_rsqrt28_ps(m.simdInternal_, x.simdInternal_)
  70     };
  71 }
  72
  73 static inline SimdFloat gmx_simdcall
  74 maskzRcp(SimdFloat x, SimdFBool m)
  75 {
  76     return {
  77                _mm512_maskz_rcp28_ps(m.simdInternal_, x.simdInternal_)
  78     };
  79 }
  80
  81 template <MathOptimization opt = MathOptimization::Safe>
  82 static inline SimdFloat gmx_simdcall
  83 exp2(SimdFloat x)
  84 {
  85     return {
  86                _mm512_exp2a23_ps(x.simdInternal_)
  87     };
  88 }
  89
  90 template <MathOptimization opt = MathOptimization::Safe>
  91 static inline SimdFloat gmx_simdcall
  92 exp(SimdFloat x)
  93 {
  94     const __m512     argscale    = _mm512_set1_ps(1.44269504088896341f);
  95     const __m512     invargscale = _mm512_set1_ps(-0.69314718055994528623f);
  96
  97     if (opt == MathOptimization::Safe)
  98     {
  99         // Set the limit to gurantee flush to zero
 100         const SimdFloat smallArgLimit(-88.f);
 101         // Since we multiply the argument by 1.44, for the safe version we need to make
 102         // sure this doesn't result in overflow
 103         x = max(x, smallArgLimit);
 104     }
 105
 106     __m512 xscaled = _mm512_mul_ps(x.simdInternal_, argscale);
 107     __m512 r       = _mm512_exp2a23_ps(xscaled);
 108
 109     // exp2a23_ps provides 23 bits of accuracy, but we ruin some of that with our argument
 110     // scaling. To correct this, we find the difference between the scaled argument and
 111     // the true one (extended precision arithmetics does not appear to be necessary to
 112     // fulfill our accuracy requirements) and then multiply by the exponent of this
 113     // correction since exp(a+b)=exp(a)*exp(b).
 114     // Note that this only adds two instructions (and maybe some constant loads).
 115
 116     // find the difference
 117     x         = _mm512_fmadd_ps(invargscale, xscaled, x.simdInternal_);
 118     // x will now be a _very_ small number, so approximate exp(x)=1+x.
 119     // We should thus apply the correction as r'=r*(1+x)=r+r*x
 120     r         = _mm512_fmadd_ps(r, x.simdInternal_, r);
 121     return {
 122                r
 123     };
 124 }
 125
 126 }      // namespace gmx
 127
 128 #endif // GMX_SIMD_IMPL_X86_AVX_512_KNL_SIMD_FLOAT_H