src/gromacs/simd/impl_ibm_vsx/impl_ibm_vsx_util_double.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 #ifndef GMX_SIMD_IMPLEMENTATION_IBM_VSX_UTIL_DOUBLE_H
  37 #define GMX_SIMD_IMPLEMENTATION_IBM_VSX_UTIL_DOUBLE_H
  38
  39 #include "config.h"
  40
  41 #include "gromacs/utility/basedefinitions.h"
  42
  43 #include "impl_ibm_vsx_definitions.h"
  44 #include "impl_ibm_vsx_simd_double.h"
  45
  46 namespace gmx
  47 {
  48
  49 template<int align>
  50 static inline void gmx_simdcall gatherLoadTranspose(const double*      base,
  51                                                     const std::int32_t offset[],
  52                                                     SimdDouble*        v0,
  53                                                     SimdDouble*        v1,
  54                                                     SimdDouble*        v2,
  55                                                     SimdDouble*        v3)
  56 {
  57     __vector double t1, t2, t3, t4;
  58
  59     t1                = *reinterpret_cast<const __vector double*>(base + align * offset[0]);
  60     t2                = *reinterpret_cast<const __vector double*>(base + align * offset[1]);
  61     t3                = *reinterpret_cast<const __vector double*>(base + align * offset[0] + 2);
  62     t4                = *reinterpret_cast<const __vector double*>(base + align * offset[1] + 2);
  63     v0->simdInternal_ = vec_mergeh(t1, t2);
  64     v1->simdInternal_ = vec_mergel(t1, t2);
  65     v2->simdInternal_ = vec_mergeh(t3, t4);
  66     v3->simdInternal_ = vec_mergel(t3, t4);
  67 }
  68
  69 template<int align>
  70 static inline void gmx_simdcall
  71                    gatherLoadTranspose(const double* base, const std::int32_t offset[], SimdDouble* v0, SimdDouble* v1)
  72 {
  73     __vector double t1, t2;
  74
  75     t1                = *reinterpret_cast<const __vector double*>(base + align * offset[0]);
  76     t2                = *reinterpret_cast<const __vector double*>(base + align * offset[1]);
  77     v0->simdInternal_ = vec_mergeh(t1, t2);
  78     v1->simdInternal_ = vec_mergel(t1, t2);
  79 }
  80
  81 static const int c_simdBestPairAlignmentDouble = 2;
  82
  83 template<int align>
  84 static inline void gmx_simdcall gatherLoadUTranspose(const double*      base,
  85                                                      const std::int32_t offset[],
  86                                                      SimdDouble*        v0,
  87                                                      SimdDouble*        v1,
  88                                                      SimdDouble*        v2)
  89 {
  90     SimdDouble t1, t2;
  91
  92     t1 = simdLoad(base + align * offset[0]);
  93     t2 = simdLoad(base + align * offset[1]);
  94
  95     v0->simdInternal_ = vec_mergeh(t1.simdInternal_, t2.simdInternal_);
  96     v1->simdInternal_ = vec_mergel(t1.simdInternal_, t2.simdInternal_);
  97     v2->simdInternal_ = vec_mergeh(vec_splats(*(base + align * offset[0] + 2)),
  98                                    vec_splats(*(base + align * offset[1] + 2)));
  99 }
 100
 101 // gcc-4.9 fails to recognize that the argument to vec_extract() is used
 102 template<int align>
 103 static inline void gmx_simdcall transposeScatterStoreU(double*            base,
 104                                                        const std::int32_t offset[],
 105                                                        SimdDouble         v0,
 106                                                        SimdDouble         v1,
 107                                                        SimdDouble gmx_unused v2)
 108 {
 109     SimdDouble t1, t2;
 110
 111     t1.simdInternal_ = vec_mergeh(v0.simdInternal_, v1.simdInternal_);
 112     t2.simdInternal_ = vec_mergel(v0.simdInternal_, v1.simdInternal_);
 113
 114     store(base + align * offset[0], t1);
 115     base[align * offset[0] + 2] = vec_extract(v2.simdInternal_, 0);
 116     store(base + align * offset[1], t2);
 117     base[align * offset[1] + 2] = vec_extract(v2.simdInternal_, 1);
 118 }
 119
 120 template<int align>
 121 static inline void gmx_simdcall
 122                    transposeScatterIncrU(double* base, const std::int32_t offset[], SimdDouble v0, SimdDouble v1, SimdDouble v2)
 123 {
 124     if (align % 4 == 0)
 125     {
 126         __vector double t1, t2, t3, t4;
 127         SimdDouble      t5, t6, t7, t8;
 128
 129         t1 = vec_mergeh(v0.simdInternal_, v1.simdInternal_);
 130         t2 = vec_mergel(v0.simdInternal_, v1.simdInternal_);
 131         t3 = vec_mergeh(v2.simdInternal_, vec_splats(0.0));
 132         t4 = vec_mergel(v2.simdInternal_, vec_splats(0.0));
 133
 134         t5               = simdLoad(base + align * offset[0]);
 135         t6               = simdLoad(base + align * offset[0] + 2);
 136         t5.simdInternal_ = vec_add(t5.simdInternal_, t1);
 137         t6.simdInternal_ = vec_add(t6.simdInternal_, t3);
 138         store(base + align * offset[0], t5);
 139         store(base + align * offset[0] + 2, t6);
 140
 141         t5               = simdLoad(base + align * offset[1]);
 142         t6               = simdLoad(base + align * offset[1] + 2);
 143         t5.simdInternal_ = vec_add(t5.simdInternal_, t2);
 144         t6.simdInternal_ = vec_add(t6.simdInternal_, t4);
 145         store(base + align * offset[1], t5);
 146         store(base + align * offset[1] + 2, t6);
 147     }
 148     else
 149     {
 150         __vector double t1, t2;
 151         SimdDouble      t3, t4;
 152
 153         t1 = vec_mergeh(v0.simdInternal_, v1.simdInternal_);
 154         t2 = vec_mergel(v0.simdInternal_, v1.simdInternal_);
 155
 156         t3               = simdLoad(base + align * offset[0]);
 157         t3.simdInternal_ = vec_add(t3.simdInternal_, t1);
 158         store(base + align * offset[0], t3);
 159         base[align * offset[0] + 2] += vec_extract(v2.simdInternal_, 0);
 160
 161         t4               = simdLoad(base + align * offset[1]);
 162         t4.simdInternal_ = vec_add(t4.simdInternal_, t2);
 163         store(base + align * offset[1], t4);
 164         base[align * offset[1] + 2] += vec_extract(v2.simdInternal_, 1);
 165     }
 166 }
 167
 168 template<int align>
 169 static inline void gmx_simdcall
 170                    transposeScatterDecrU(double* base, const std::int32_t offset[], SimdDouble v0, SimdDouble v1, SimdDouble v2)
 171 {
 172     if (align % 4 == 0)
 173     {
 174         __vector double t1, t2, t3, t4;
 175         SimdDouble      t5, t6, t7, t8;
 176
 177         t1 = vec_mergeh(v0.simdInternal_, v1.simdInternal_);
 178         t2 = vec_mergel(v0.simdInternal_, v1.simdInternal_);
 179         t3 = vec_mergeh(v2.simdInternal_, vec_splats(0.0));
 180         t4 = vec_mergel(v2.simdInternal_, vec_splats(0.0));
 181
 182         t5               = simdLoad(base + align * offset[0]);
 183         t6               = simdLoad(base + align * offset[0] + 2);
 184         t5.simdInternal_ = vec_sub(t5.simdInternal_, t1);
 185         t6.simdInternal_ = vec_sub(t6.simdInternal_, t3);
 186         store(base + align * offset[0], t5);
 187         store(base + align * offset[0] + 2, t6);
 188
 189         t5               = simdLoad(base + align * offset[1]);
 190         t6               = simdLoad(base + align * offset[1] + 2);
 191         t5.simdInternal_ = vec_sub(t5.simdInternal_, t2);
 192         t6.simdInternal_ = vec_sub(t6.simdInternal_, t4);
 193         store(base + align * offset[1], t5);
 194         store(base + align * offset[1] + 2, t6);
 195     }
 196     else
 197     {
 198         __vector double t1, t2;
 199         SimdDouble      t3, t4;
 200
 201         t1 = vec_mergeh(v0.simdInternal_, v1.simdInternal_);
 202         t2 = vec_mergel(v0.simdInternal_, v1.simdInternal_);
 203
 204         t3               = simdLoad(base + align * offset[0]);
 205         t3.simdInternal_ = vec_sub(t3.simdInternal_, t1);
 206         store(base + align * offset[0], t3);
 207         base[align * offset[0] + 2] -= vec_extract(v2.simdInternal_, 0);
 208
 209         t4               = simdLoad(base + align * offset[1]);
 210         t4.simdInternal_ = vec_sub(t4.simdInternal_, t2);
 211         store(base + align * offset[1], t4);
 212         base[align * offset[1] + 2] -= vec_extract(v2.simdInternal_, 1);
 213     }
 214 }
 215
 216 static inline void gmx_simdcall expandScalarsToTriplets(SimdDouble  scalar,
 217                                                         SimdDouble* triplets0,
 218                                                         SimdDouble* triplets1,
 219                                                         SimdDouble* triplets2)
 220 {
 221     triplets0->simdInternal_ = vec_mergeh(scalar.simdInternal_, scalar.simdInternal_);
 222     triplets1->simdInternal_ = scalar.simdInternal_;
 223     triplets2->simdInternal_ = vec_mergel(scalar.simdInternal_, scalar.simdInternal_);
 224 }
 225
 226 template<int align>
 227 static inline void gmx_simdcall gatherLoadBySimdIntTranspose(const double* base,
 228                                                              SimdDInt32    offset,
 229                                                              SimdDouble*   v0,
 230                                                              SimdDouble*   v1,
 231                                                              SimdDouble*   v2,
 232                                                              SimdDouble*   v3)
 233 {
 234     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ioffset[GMX_SIMD_DINT32_WIDTH];
 235
 236     store(ioffset, offset);
 237     gatherLoadTranspose<align>(base, ioffset, v0, v1, v2, v3);
 238 }
 239
 240 template<int align>
 241 static inline void gmx_simdcall
 242                    gatherLoadBySimdIntTranspose(const double* base, SimdDInt32 offset, SimdDouble* v0, SimdDouble* v1)
 243 {
 244     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ioffset[GMX_SIMD_DINT32_WIDTH];
 245
 246     store(ioffset, offset);
 247     gatherLoadTranspose<align>(base, ioffset, v0, v1);
 248 }
 249
 250
 251 template<int align>
 252 static inline void gmx_simdcall
 253                    gatherLoadUBySimdIntTranspose(const double* base, SimdDInt32 offset, SimdDouble* v0, SimdDouble* v1)
 254 {
 255     alignas(GMX_SIMD_ALIGNMENT) std::int32_t ioffset[GMX_SIMD_DINT32_WIDTH];
 256
 257     store(ioffset, offset);
 258
 259     SimdDouble t1     = simdLoadU(base + align * ioffset[0]);
 260     SimdDouble t2     = simdLoadU(base + align * ioffset[1]);
 261     v0->simdInternal_ = vec_mergeh(t1.simdInternal_, t2.simdInternal_);
 262     v1->simdInternal_ = vec_mergel(t1.simdInternal_, t2.simdInternal_);
 263 }
 264
 265 static inline double gmx_simdcall
 266                      reduceIncr4ReturnSum(double* m, SimdDouble v0, SimdDouble v1, SimdDouble v2, SimdDouble v3)
 267 {
 268     __vector double t1, t2, t3, t4;
 269
 270     t1 = vec_mergeh(v0.simdInternal_, v1.simdInternal_);
 271     t2 = vec_mergel(v0.simdInternal_, v1.simdInternal_);
 272     t3 = vec_mergeh(v2.simdInternal_, v3.simdInternal_);
 273     t4 = vec_mergel(v2.simdInternal_, v3.simdInternal_);
 274
 275     t1 = vec_add(t1, t2);
 276     t3 = vec_add(t3, t4);
 277
 278     *reinterpret_cast<__vector double*>(m) += t1;
 279     *reinterpret_cast<__vector double*>(m + 2) += t3;
 280
 281     t1 = vec_add(t1, t3);
 282     return reduce(t1);
 283 }
 284
 285 } // namespace gmx
 286
 287 #endif // GMX_SIMD_IMPLEMENTATION_IBM_VSX_UTIL_DOUBLE_H