src/gromacs/simd/impl_sparc64_hpc_ace/impl_sparc64_hpc_ace_simd_float.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2014,2015,2019, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 #ifndef GMX_SIMD_IMPL_SPARC64_HPC_ACE_SIMD_FLOAT_H
  37 #define GMX_SIMD_IMPL_SPARC64_HPC_ACE_SIMD_FLOAT_H
  38
  39 /* Fujitsu header borrows the name from SSE2, since some instructions have aliases.
  40  * Environment/compiler version GM-1.2.0-17 seems to be buggy; when -Xg is
  41  * defined to enable GNUC extensions, this sets _ISOC99_SOURCE, which in
  42  * turn causes all intrinsics to be declared inline _instead_ of static. This
  43  * leads to duplicate symbol errors at link time.
  44  * To work around this we unset this before including the HPC-ACE header, and
  45  * reset the value afterwards.
  46  */
  47 #ifdef _ISOC99_SOURCE
  48 #    undef _ISOC99_SOURCE
  49 #    define SAVE_ISOC99_SOURCE
  50 #endif
  51
  52 #include <emmintrin.h>
  53
  54 #ifdef SAVE_ISOC99_SOURCE
  55 #    define _ISOC99_SOURCE
  56 #    undef SAVE_ISOC99_SOURCE
  57 #endif
  58
  59 #include <math.h>
  60
  61 #include "impl_sparc64_hpc_ace_common.h"
  62
  63 /* HPC-ACE is a bit strange; some instructions like
  64  * shifts only work on _integer_ versions of SIMD
  65  * registers, but there are no intrinsics to load
  66  * or convert, or even to cast. The only way to use
  67  * them is to declare unions with the SIMD integer
  68  * type. However, this will lead to extra load ops,
  69  * and the normal real-to-int and int-to-real
  70  * conversions work purely on the v2r8 fp regs.
  71  * Since our most common usage is to convert and
  72  * then extract the result for table lookups, we
  73  * define the SimdFInt32 datatype to use
  74  * the v2r8 rather than v2i8 SIMD type.
  75  */
  76
  77 /****************************************************
  78  *      SINGLE PRECISION SIMD IMPLEMENTATION        *
  79  ****************************************************/
  80 #define SimdFloat _fjsp_v2r8
  81 #define simdLoadF simdLoadF_sparc64_hpc_ace
  82 #define simdLoad1F(m) _fjsp_set_v2r8((*m), (*m))
  83 #define simdSet1F(a) _fjsp_set_v2r8(a, a)
  84 #define simdStoreF simdStoreF_sparc64_hpc_ace
  85 #define simdLoadUF simdLoadF
  86 /* No unaligned store of SimdFloat */
  87 #define simdSetZeroF _fjsp_setzero_v2r8
  88 #define simdAddF _fjsp_add_v2r8
  89 #define simdSubF _fjsp_sub_v2r8
  90 #define simdMulF _fjsp_mul_v2r8
  91 #define simdFmaddF(a, b, c) _fjsp_madd_v2r8(a, b, c)
  92 #define simdFmsubF(a, b, c) _fjsp_msub_v2r8(a, b, c)
  93 #define simdFnmaddF(a, b, c) _fjsp_nmsub_v2r8(a, b, c)
  94 #define simdFnmsubF(a, b, c) _fjsp_nmadd_v2r8(a, b, c)
  95 #define simdAndF _fjsp_and_v2r8
  96 #define simdAndNotF _fjsp_andnot1_v2r8
  97 #define simdOrF _fjsp_or_v2r8
  98 #define simdXorF _fjsp_xor_v2r8
  99 #define simdRsqrtF _fjsp_rsqrta_v2r8
 100 #define simdRcpF _fjsp_rcpa_v2r8
 101 #define simdAbsF(x) _fjsp_abs_v2r8(x)
 102 #define simdNegF(x) _fjsp_neg_v2r8(x)
 103 #define simdMaxF _fjsp_max_v2r8
 104 #define simdMinF _fjsp_min_v2r8
 105 #define simdRoundF(x) simdRoundD(x)
 106 #define simdTruncF(x) simdTruncD(x)
 107 #define simdFractionF(x) simdSubF(x, simdTruncF(x))
 108 #define simdGetExponentF simdGetExponentD_sparc64_hpc_ace
 109 #define simdGetMantissaF simdGetMantissaD_sparc64_hpc_ace
 110 #define simdSetExponentF simdSetExponentD_sparc64_hpc_ace
 111 /* integer datatype corresponding to float: SimdFInt32 */
 112 #define SimdFInt32 _fjsp_v2r8
 113 #define simdLoadFI(m) simdLoadDI_sparc64_hpc_ace(m)
 114 #define simdSet1FI(i) simdSet1DI_sparc64_hpc_ace(i)
 115 #define simdStoreFI(m, x) simdStoreDI_sparc64_hpc_ace(m, x)
 116 #define simdLoadUFI simdLoadFI
 117 /* No unaligned store of SimdFInt32 */
 118 #define simdSetZeroFI _fjsp_setzero_v2r8
 119 #define simdCvtF2I simdCvtD2I
 120 #define simdCvttF2I _fjsp_dtox_v2r8
 121 #define simdCvtI2F _fjsp_xtod_v2r8
 122 #define simdExtractFI simdExtractDI_sparc64_hpc_ace
 123 /* Integer logical ops on SimdFInt32 */
 124 /* Shifts are horrible since they require memory re-loads. */
 125 #define simdSlliFI simdSlliDI_sparc64_hpc_ace
 126 #define simdSrliFI simdSrliDI_sparc64_hpc_ace
 127 #define simdAndFI _fjsp_and_v2r8
 128 #define simdAndNotFI(a, b) _fjsp_andnot1_v2r8(a, b)
 129 #define simdOrFI _fjsp_or_v2r8
 130 #define simdXorFI _fjsp_xor_v2r8
 131 /* No integer arithmetic ops on SimdFInt32 */
 132 /* Boolean & comparison operations on SimdFloat */
 133 #define SimdFBool _fjsp_v2r8
 134 #define simdCmpEqF _fjsp_cmpeq_v2r8
 135 #define simdCmpLtF _fjsp_cmplt_v2r8
 136 #define simdCmpLeF _fjsp_cmple_v2r8
 137 #define simdAndFB _fjsp_and_v2r8
 138 #define simdOrFB _fjsp_or_v2r8
 139 #define simdAnyTrueFB gmx_simd_anytrue_d_sparc64_hpc_ace
 140 #define simdMaskF _fjsp_and_v2r8
 141 #define simdMaskNotF(a, sel) _fjsp_andnot1_v2r8(sel, a)
 142 #define simdBlendF(a, b, s) _fjsp_selmov_v2r8(b, a, s)
 143 #define simdReduceF(a) simdReduceD_sparc64_hpc_ace(a)
 144 /* No boolean & comparison operations on SimdFInt32 */
 145 /* No conversions between different booleans */
 146
 147 /****************************************************
 148  * SINGLE PRECISION IMPLEMENTATION HELPER FUNCTIONS *
 149  ****************************************************/
 150 static inline SimdFloat simdLoadF_sparc64_hpc_ace(const float* m)
 151 {
 152     /* We are not allowed to cast single-to-double registers, but we can
 153      * masquerade the memory location as a variable of type _fjsp_v2r4.
 154      */
 155     const _fjsp_v2r4* p = (const _fjsp_v2r4*)m;
 156     _fjsp_v2r4        simd;
 157
 158     simd = *p;
 159     return _fjsp_stod_v2r8(simd);
 160 }
 161
 162 static inline void simdStoreF_sparc64_hpc_ace(float* m, SimdFloat x)
 163 {
 164     /* We are not allowed to cast single-to-double registers, but we can
 165      * masquerade the memory location as a variable of type _fjsp_v2r4.
 166      */
 167     _fjsp_v2r4* p = (_fjsp_v2r4*)m;
 168     *p            = _fjsp_dtos_v2r4(x);
 169 }
 170
 171 /* Note that some single precision defines refer to the double precision helpers */
 172
 173 #endif /* GMX_SIMD_IMPL_SPARC64_HPC_ACE_SIMD_FLOAT_H */