src/gromacs/simd/impl_ibm_vsx/impl_ibm_vsx_util_double.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2014,2015,2016, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 #ifndef GMX_SIMD_IMPLEMENTATION_IBM_VSX_UTIL_DOUBLE_H
  37 #define GMX_SIMD_IMPLEMENTATION_IBM_VSX_UTIL_DOUBLE_H
  38
  39 #include "config.h"
  40
  41 #include "gromacs/utility/basedefinitions.h"
  42
  43 #include "impl_ibm_vsx_definitions.h"
  44 #include "impl_ibm_vsx_simd_double.h"
  45
  46 namespace gmx
  47 {
  48
  49 template <int align>
  50 static inline void gmx_simdcall
  51 gatherLoadTranspose(const double *        base,
  52                     const std::int32_t    offset[],
  53                     SimdDouble *          v0,
  54                     SimdDouble *          v1,
  55                     SimdDouble *          v2,
  56                     SimdDouble *          v3)
  57 {
  58     __vector double t1, t2, t3, t4;
  59
  60     t1                = *reinterpret_cast<const __vector double *>(base + align * offset[0]);
  61     t2                = *reinterpret_cast<const __vector double *>(base + align * offset[1]);
  62     t3                = *reinterpret_cast<const __vector double *>(base + align * offset[0] + 2);
  63     t4                = *reinterpret_cast<const __vector double *>(base + align * offset[1] + 2);
  64     v0->simdInternal_ = vec_mergeh(t1, t2);
  65     v1->simdInternal_ = vec_mergel(t1, t2);
  66     v2->simdInternal_ = vec_mergeh(t3, t4);
  67     v3->simdInternal_ = vec_mergel(t3, t4);
  68
  69 }
  70
  71 template <int align>
  72 static inline void gmx_simdcall
  73 gatherLoadTranspose(const double *        base,
  74                     const std::int32_t    offset[],
  75                     SimdDouble *          v0,
  76                     SimdDouble *          v1)
  77 {
  78     __vector double t1, t2;
  79
  80     t1                = *reinterpret_cast<const __vector double *>(base + align * offset[0]);
  81     t2                = *reinterpret_cast<const __vector double *>(base + align * offset[1]);
  82     v0->simdInternal_ = vec_mergeh(t1, t2);
  83     v1->simdInternal_ = vec_mergel(t1, t2);
  84
  85 }
  86
  87 static const int c_simdBestPairAlignmentDouble = 2;
  88
  89 template <int align>
  90 static inline void gmx_simdcall
  91 gatherLoadUTranspose(const double *        base,
  92                      const std::int32_t    offset[],
  93                      SimdDouble *          v0,
  94                      SimdDouble *          v1,
  95                      SimdDouble *          v2)
  96 {
  97     SimdDouble      t1, t2;
  98
  99     t1  = simdLoad(base + align * offset[0]);
 100     t2  = simdLoad(base + align * offset[1]);
 101
 102     v0->simdInternal_ = vec_mergeh(t1.simdInternal_, t2.simdInternal_);
 103     v1->simdInternal_ = vec_mergel(t1.simdInternal_, t2.simdInternal_);
 104     v2->simdInternal_ = vec_mergeh(vec_splats(*(base + align * offset[0] + 2)),
 105                                    vec_splats(*(base + align * offset[1] + 2)));
 106 }
 107
 108 // gcc-4.9 fails to recognize that the argument to vec_extract() is used
 109 template <int align>
 110 static inline void gmx_simdcall
 111 transposeScatterStoreU(double *              base,
 112                        const std::int32_t    offset[],
 113                        SimdDouble            v0,
 114                        SimdDouble            v1,
 115                        SimdDouble gmx_unused v2)
 116 {
 117     SimdDouble t1, t2;
 118
 119     t1.simdInternal_ = vec_mergeh(v0.simdInternal_, v1.simdInternal_);
 120     t2.simdInternal_ = vec_mergel(v0.simdInternal_, v1.simdInternal_);
 121
 122     store(base + align * offset[0], t1);
 123     base[align * offset[0] + 2]  = vec_extract(v2.simdInternal_, 0);
 124     store(base + align * offset[1], t2);
 125     base[align * offset[1] + 2]  = vec_extract(v2.simdInternal_, 1);
 126 }
 127
 128 template <int align>
 129 static inline void gmx_simdcall
 130 transposeScatterIncrU(double *            base,
 131                       const std::int32_t  offset[],
 132                       SimdDouble          v0,
 133                       SimdDouble          v1,
 134                       SimdDouble          v2)
 135 {
 136     if (align % 4 == 0)
 137     {
 138         __vector double t1, t2, t3, t4;
 139         SimdDouble      t5, t6, t7, t8;
 140
 141         t1 = vec_mergeh(v0.simdInternal_, v1.simdInternal_);
 142         t2 = vec_mergel(v0.simdInternal_, v1.simdInternal_);
 143         t3 = vec_mergeh(v2.simdInternal_, vec_splats(0.0));
 144         t4 = vec_mergel(v2.simdInternal_, vec_splats(0.0));
 145
 146         t5               = simdLoad(base + align * offset[0]);
 147         t6               = simdLoad(base + align * offset[0] + 2);
 148         t5.simdInternal_ = vec_add(t5.simdInternal_, t1);
 149         t6.simdInternal_ = vec_add(t6.simdInternal_, t3);
 150         store(base + align * offset[0], t5);
 151         store(base + align * offset[0] + 2, t6);
 152
 153         t5               = simdLoad(base + align * offset[1]);
 154         t6               = simdLoad(base + align * offset[1] + 2);
 155         t5.simdInternal_ = vec_add(t5.simdInternal_, t2);
 156         t6.simdInternal_ = vec_add(t6.simdInternal_, t4);
 157         store(base + align * offset[1], t5);
 158         store(base + align * offset[1] + 2, t6);
 159     }
 160     else
 161     {
 162         __vector double t1, t2;
 163         SimdDouble      t3, t4;
 164
 165         t1 = vec_mergeh(v0.simdInternal_, v1.simdInternal_);
 166         t2 = vec_mergel(v0.simdInternal_, v1.simdInternal_);
 167
 168         t3 = simdLoad(base + align * offset[0]);
 169         t3.simdInternal_             = vec_add(t3.simdInternal_, t1);
 170         store(base + align * offset[0], t3);
 171         base[align * offset[0] + 2] += vec_extract(v2.simdInternal_, 0);
 172
 173         t4 = simdLoad(base + align * offset[1]);
 174         t4.simdInternal_             = vec_add(t4.simdInternal_, t2);
 175         store(base + align * offset[1], t4);
 176         base[align * offset[1] + 2] += vec_extract(v2.simdInternal_, 1);
 177     }
 178 }
 179
 180 template <int align>
 181 static inline void gmx_simdcall
 182 transposeScatterDecrU(double *            base,
 183                       const std::int32_t  offset[],
 184                       SimdDouble          v0,
 185                       SimdDouble          v1,
 186                       SimdDouble          v2)
 187 {
 188     if (align % 4 == 0)
 189     {
 190         __vector double t1, t2, t3, t4;
 191         SimdDouble      t5, t6, t7, t8;
 192
 193         t1 = vec_mergeh(v0.simdInternal_, v1.simdInternal_);
 194         t2 = vec_mergel(v0.simdInternal_, v1.simdInternal_);
 195         t3 = vec_mergeh(v2.simdInternal_, vec_splats(0.0));
 196         t4 = vec_mergel(v2.simdInternal_, vec_splats(0.0));
 197
 198         t5               = simdLoad(base + align * offset[0]);
 199         t6               = simdLoad(base + align * offset[0] + 2);
 200         t5.simdInternal_ = vec_sub(t5.simdInternal_, t1);
 201         t6.simdInternal_ = vec_sub(t6.simdInternal_, t3);
 202         store(base + align * offset[0], t5);
 203         store(base + align * offset[0] + 2, t6);
 204
 205         t5               = simdLoad(base + align * offset[1]);
 206         t6               = simdLoad(base + align * offset[1] + 2);
 207         t5.simdInternal_ = vec_sub(t5.simdInternal_, t2);
 208         t6.simdInternal_ = vec_sub(t6.simdInternal_, t4);
 209         store(base + align * offset[1], t5);
 210         store(base + align * offset[1] + 2, t6);
 211     }
 212     else
 213     {
 214         __vector double t1, t2;
 215         SimdDouble      t3, t4;
 216
 217         t1 = vec_mergeh(v0.simdInternal_, v1.simdInternal_);
 218         t2 = vec_mergel(v0.simdInternal_, v1.simdInternal_);
 219
 220         t3 = simdLoad(base + align * offset[0]);
 221         t3.simdInternal_             = vec_sub(t3.simdInternal_, t1);
 222         store(base + align * offset[0], t3);
 223         base[align * offset[0] + 2] -= vec_extract(v2.simdInternal_, 0);
 224
 225         t4 = simdLoad(base + align * offset[1]);
 226         t4.simdInternal_             = vec_sub(t4.simdInternal_, t2);
 227         store(base + align * offset[1], t4);
 228         base[align * offset[1] + 2] -= vec_extract(v2.simdInternal_, 1);
 229     }
 230 }
 231
 232 static inline void gmx_simdcall
 233 expandScalarsToTriplets(SimdDouble    scalar,
 234                         SimdDouble *  triplets0,
 235                         SimdDouble *  triplets1,
 236                         SimdDouble *  triplets2)
 237 {
 238     triplets0->simdInternal_ = vec_mergeh(scalar.simdInternal_, scalar.simdInternal_);
 239     triplets1->simdInternal_ = scalar.simdInternal_;
 240     triplets2->simdInternal_ = vec_mergel(scalar.simdInternal_, scalar.simdInternal_);
 241 }
 242
 243 template <int align>
 244 static inline void gmx_simdcall
 245 gatherLoadBySimdIntTranspose(const double *  base,
 246                              SimdDInt32      offset,
 247                              SimdDouble *    v0,
 248                              SimdDouble *    v1,
 249                              SimdDouble *    v2,
 250                              SimdDouble *    v3)
 251 {
 252     GMX_ALIGNED(std::int32_t, GMX_SIMD_DINT32_WIDTH) ioffset[GMX_SIMD_DINT32_WIDTH];
 253
 254     store(ioffset, offset );
 255     gatherLoadTranspose<align>(base, ioffset, v0, v1, v2, v3);
 256 }
 257
 258 template <int align>
 259 static inline void gmx_simdcall
 260 gatherLoadBySimdIntTranspose(const double *    base,
 261                              SimdDInt32        offset,
 262                              SimdDouble *      v0,
 263                              SimdDouble *      v1)
 264 {
 265     GMX_ALIGNED(std::int32_t, GMX_SIMD_DINT32_WIDTH) ioffset[GMX_SIMD_DINT32_WIDTH];
 266
 267     store(ioffset, offset );
 268     gatherLoadTranspose<align>(base, ioffset, v0, v1);
 269 }
 270
 271
 272 template <int align>
 273 static inline void gmx_simdcall
 274 gatherLoadUBySimdIntTranspose(const double *  base,
 275                               SimdDInt32      offset,
 276                               SimdDouble *    v0,
 277                               SimdDouble *    v1)
 278 {
 279     GMX_ALIGNED(std::int32_t, GMX_SIMD_DINT32_WIDTH) ioffset[GMX_SIMD_DINT32_WIDTH];
 280
 281     store(ioffset, offset );
 282     gatherLoadTranspose<align>(base, ioffset, v0, v1);
 283 }
 284
 285 static inline double gmx_simdcall
 286 reduceIncr4ReturnSum(double *    m,
 287                      SimdDouble  v0,
 288                      SimdDouble  v1,
 289                      SimdDouble  v2,
 290                      SimdDouble  v3)
 291 {
 292     __vector double t1, t2, t3, t4;
 293
 294     t1 = vec_mergeh(v0.simdInternal_, v1.simdInternal_);
 295     t2 = vec_mergel(v0.simdInternal_, v1.simdInternal_);
 296     t3 = vec_mergeh(v2.simdInternal_, v3.simdInternal_);
 297     t4 = vec_mergel(v2.simdInternal_, v3.simdInternal_);
 298
 299     t1 = vec_add(t1, t2);
 300     t3 = vec_add(t3, t4);
 301
 302     *reinterpret_cast<__vector double *>(m)   += t1;
 303     *reinterpret_cast<__vector double *>(m+2) += t3;
 304
 305     t1 = vec_add(t1, t3);
 306     return reduce(t1);
 307 }
 308
 309 }      // namespace gmx
 310
 311 #endif // GMX_SIMD_IMPLEMENTATION_IBM_VSX_UTIL_DOUBLE_H