include/gmx_simd_ref.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2012, The GROMACS Development Team
   6  * Copyright (c) 2012,2013, by the GROMACS development team, led by
   7  * David van der Spoel, Berk Hess, Erik Lindahl, and including many
   8  * others, as listed in the AUTHORS file in the top-level source
   9  * directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37
  38 #ifndef _gmx_simd_ref_h_
  39 #define _gmx_simd_ref_h_
  40
  41 /* This file contains a reference plain-C implementation of arbitrary width.
  42  * This code is only useful for testing and documentation.
  43  * The SIMD width is set by defining GMX_SIMD_REF_WIDTH before including.
  44  */
  45
  46
  47 #ifndef GMX_SIMD_REF_WIDTH
  48 #error "GMX_SIMD_REF_WIDTH should be defined before including gmx_simd_ref.h"
  49 #endif
  50
  51 #include <math.h>
  52
  53 /* float/double SIMD register type */
  54 typedef struct {
  55     real r[GMX_SIMD_REF_WIDTH];
  56 } gmx_simd_ref_pr;
  57
  58 /* boolean SIMD register type */
  59 typedef struct {
  60     char r[GMX_SIMD_REF_WIDTH];
  61 } gmx_simd_ref_pb;
  62
  63 /* integer SIMD register type, only for table indexing and exclusion masks */
  64 typedef struct {
  65     int r[GMX_SIMD_REF_WIDTH];
  66 } gmx_simd_ref_epi32;
  67 #define GMX_SIMD_REF_EPI32_WIDTH  GMX_SIMD_REF_WIDTH
  68
  69 /* Load GMX_SIMD_REF_WIDTH reals for memory starting at r */
  70 static gmx_inline gmx_simd_ref_pr
  71 gmx_simd_ref_load_pr(const real *r)
  72 {
  73     gmx_simd_ref_pr a;
  74     int             i;
  75
  76     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
  77     {
  78         a.r[i] = r[i];
  79     }
  80
  81     return a;
  82 }
  83
  84 /* Set all SIMD register elements to *r */
  85 static gmx_inline gmx_simd_ref_pr
  86 gmx_simd_ref_load1_pr(const real *r)
  87 {
  88     gmx_simd_ref_pr a;
  89     int             i;
  90
  91     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
  92     {
  93         a.r[i] = *r;
  94     }
  95
  96     return a;
  97 }
  98
  99 /* Set all SIMD register elements to r */
 100 static gmx_inline gmx_simd_ref_pr
 101 gmx_simd_ref_set1_pr(real r)
 102 {
 103     gmx_simd_ref_pr a;
 104     int             i;
 105
 106     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 107     {
 108         a.r[i] = r;
 109     }
 110
 111     return a;
 112 }
 113
 114 /* Set all SIMD register elements to 0 */
 115 static gmx_inline gmx_simd_ref_pr
 116 gmx_simd_ref_setzero_pr()
 117 {
 118     gmx_simd_ref_pr a;
 119     int             i;
 120
 121     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 122     {
 123         a.r[i] = 0.0;
 124     }
 125
 126     return a;
 127 }
 128
 129 static gmx_inline void
 130 gmx_simd_ref_store_pr(real *dest, gmx_simd_ref_pr src)
 131 {
 132     int i;
 133
 134     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 135     {
 136         dest[i] = src.r[i];
 137     }
 138 }
 139
 140 static gmx_inline gmx_simd_ref_pr
 141 gmx_simd_ref_add_pr(gmx_simd_ref_pr a, gmx_simd_ref_pr b)
 142 {
 143     gmx_simd_ref_pr c;
 144     int             i;
 145
 146     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 147     {
 148         c.r[i] = a.r[i] + b.r[i];
 149     }
 150
 151     return c;
 152 }
 153
 154 static gmx_inline gmx_simd_ref_pr
 155 gmx_simd_ref_sub_pr(gmx_simd_ref_pr a, gmx_simd_ref_pr b)
 156 {
 157     gmx_simd_ref_pr c;
 158     int             i;
 159
 160     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 161     {
 162         c.r[i] = a.r[i] - b.r[i];
 163     }
 164
 165     return c;
 166 }
 167
 168 static gmx_inline gmx_simd_ref_pr
 169 gmx_simd_ref_mul_pr(gmx_simd_ref_pr a, gmx_simd_ref_pr b)
 170 {
 171     gmx_simd_ref_pr c;
 172     int             i;
 173
 174     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 175     {
 176         c.r[i] = a.r[i]*b.r[i];
 177     }
 178
 179     return c;
 180 }
 181
 182 static gmx_inline gmx_simd_ref_pr
 183 gmx_simd_ref_madd_pr(gmx_simd_ref_pr a, gmx_simd_ref_pr b, gmx_simd_ref_pr c)
 184 {
 185     gmx_simd_ref_pr d;
 186     int             i;
 187
 188     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 189     {
 190         d.r[i] = a.r[i]*b.r[i] + c.r[i];
 191     }
 192
 193     return d;
 194 }
 195
 196 static gmx_inline gmx_simd_ref_pr
 197 gmx_simd_ref_nmsub_pr(gmx_simd_ref_pr a, gmx_simd_ref_pr b, gmx_simd_ref_pr c)
 198 {
 199     gmx_simd_ref_pr d;
 200     int             i;
 201
 202     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 203     {
 204         d.r[i] = -a.r[i]*b.r[i] + c.r[i];
 205     }
 206
 207     return d;
 208 }
 209
 210 static gmx_inline gmx_simd_ref_pr
 211 gmx_simd_ref_max_pr(gmx_simd_ref_pr a, gmx_simd_ref_pr b)
 212 {
 213     gmx_simd_ref_pr c;
 214     int             i;
 215
 216     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 217     {
 218         c.r[i] = (a.r[i] >= b.r[i] ? a.r[i] : b.r[i]);
 219     }
 220
 221     return c;
 222 }
 223
 224 static gmx_inline gmx_simd_ref_pr
 225 gmx_simd_ref_blendzero_pr(gmx_simd_ref_pr a, gmx_simd_ref_pb b)
 226 {
 227     gmx_simd_ref_pr c;
 228     int             i;
 229
 230     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 231     {
 232         c.r[i] = (b.r[i] ? a.r[i] : 0.0);
 233     }
 234
 235     return c;
 236 }
 237
 238 /* Note that this reference implementation rounds away from zero,
 239  * whereas most SIMD intrinsics will round to nearest even. Since this
 240  * function is only used for periodic image calculations, the rounding
 241  * of mantissas close to 0.5 is irrelevant, except in testing. This
 242  * could be fixed by using rint/rintf, but the bigger problem is that
 243  * MSVC does not support full C99, and none of the round or rint
 244  * functions are defined. It's much easier to approximately implement
 245  * round() than rint(), so we do that and hope we never get bitten in
 246  * testing. (Thanks, Microsoft.)
 247  */
 248 static gmx_inline gmx_simd_ref_pr
 249 gmx_simd_ref_round_pr(gmx_simd_ref_pr a)
 250 {
 251     gmx_simd_ref_pr b;
 252     int             i;
 253
 254     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 255     {
 256 #ifdef _MSC_VER
 257         int temp = (a.r[i] >= 0.)
 258             ? (a.r[i] + 0.5)
 259             : (a.r[i] - 0.5);
 260         b.r[i] = (real) temp;
 261 #elif defined GMX_DOUBLE
 262         b.r[i] = round(a.r[i]);
 263 #else
 264         b.r[i] = roundf(a.r[i]);
 265 #endif
 266     }
 267
 268     return b;
 269 }
 270
 271 /* Not required, only used to speed up the nbnxn tabulated PME kernels */
 272 static gmx_inline gmx_simd_ref_pr
 273 gmx_simd_ref_floor_pr(gmx_simd_ref_pr a)
 274 {
 275     gmx_simd_ref_pr b;
 276     int             i;
 277
 278     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 279     {
 280 #ifdef GMX_DOUBLE
 281         b.r[i] = floor(a.r[i]);
 282 #else
 283         b.r[i] = floorf(a.r[i]);
 284 #endif
 285     }
 286
 287     return b;
 288 }
 289
 290 /* Not required, only used when blendv is faster than comparison */
 291 static gmx_inline gmx_simd_ref_pr
 292 gmx_simd_ref_blendv_pr(gmx_simd_ref_pr a, gmx_simd_ref_pr b, gmx_simd_ref_pr c)
 293 {
 294     gmx_simd_ref_pr d;
 295     int             i;
 296
 297     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 298     {
 299         d.r[i] = (c.r[i] >= 0) ? a.r[i] : b.r[i];
 300     }
 301
 302     return d;
 303 }
 304
 305 /* Copy the sign of a to b, assumes b >= 0 for efficiency */
 306 static gmx_inline gmx_simd_ref_pr
 307 gmx_simd_ref_cpsgn_nonneg_pr(gmx_simd_ref_pr a, gmx_simd_ref_pr b)
 308 {
 309     gmx_simd_ref_pr c;
 310     int             i;
 311
 312     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 313     {
 314         c.r[i] = (a.r[i] >= 0) ? b.r[i] : -b.r[i];
 315     }
 316
 317     return c;
 318 }
 319
 320 /* Very specific operation required in the non-bonded kernels */
 321 static gmx_inline gmx_simd_ref_pr
 322 gmx_simd_ref_masknot_add_pr(gmx_simd_ref_pb a, gmx_simd_ref_pr b, gmx_simd_ref_pr c)
 323 {
 324     gmx_simd_ref_pr d;
 325     int             i;
 326
 327     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 328     {
 329         d.r[i] = a.r[i] ? b.r[i] : b.r[i] + c.r[i];
 330     }
 331
 332     return d;
 333 }
 334
 335 /* Comparison */
 336 static gmx_inline gmx_simd_ref_pb
 337 gmx_simd_ref_cmplt_pr(gmx_simd_ref_pr a, gmx_simd_ref_pr b)
 338 {
 339     gmx_simd_ref_pb c;
 340     int             i;
 341
 342     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 343     {
 344         c.r[i] = (a.r[i] < b.r[i]);
 345     }
 346
 347     return c;
 348 }
 349
 350 /* Logical AND on SIMD booleans. Can't be static or it can't be a
 351    template parameter (at least on XLC for BlueGene/Q) */
 352 gmx_inline gmx_simd_ref_pb
 353 gmx_simd_ref_and_pb(gmx_simd_ref_pb a, gmx_simd_ref_pb b)
 354 {
 355     gmx_simd_ref_pb c;
 356     int             i;
 357
 358     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 359     {
 360         c.r[i] = (a.r[i] && b.r[i]);
 361     }
 362
 363     return c;
 364 }
 365
 366 /* Logical OR on SIMD booleans. Can't be static or it can't be a
 367    template parameter (at least on XLC for BlueGene/Q) */
 368 gmx_inline gmx_simd_ref_pb
 369 gmx_simd_ref_or_pb(gmx_simd_ref_pb a, gmx_simd_ref_pb b)
 370 {
 371     gmx_simd_ref_pb c;
 372     int             i;
 373
 374     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 375     {
 376         c.r[i] = (a.r[i] || b.r[i]);
 377     }
 378
 379     return c;
 380 }
 381
 382 /* Returns a single int (0/1) which tells if any of the booleans is True */
 383 static gmx_inline int
 384 gmx_simd_ref_anytrue_pb(gmx_simd_ref_pb a)
 385 {
 386     int anytrue;
 387     int i;
 388
 389     anytrue = 0;
 390     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 391     {
 392         if (a.r[i])
 393         {
 394             anytrue = 1;
 395         }
 396     }
 397
 398     return anytrue;
 399 }
 400
 401 /* Conversions only used for PME table lookup */
 402 static gmx_inline gmx_simd_ref_epi32
 403 gmx_simd_ref_cvttpr_epi32(gmx_simd_ref_pr a)
 404 {
 405     gmx_simd_ref_epi32 b;
 406     int                i;
 407
 408     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 409     {
 410         b.r[i] = (int)a.r[i];
 411     }
 412
 413     return b;
 414 };
 415
 416 /* These two function only need to be approximate, Newton-Raphson iteration
 417  * is used for full accuracy in gmx_invsqrt_pr and gmx_inv_pr.
 418  */
 419 static gmx_inline gmx_simd_ref_pr
 420 gmx_simd_ref_rsqrt_pr(gmx_simd_ref_pr a)
 421 {
 422     gmx_simd_ref_pr b;
 423     int             i;
 424
 425     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 426     {
 427 #ifdef GMX_DOUBLE
 428         b.r[i] = 1.0/sqrt(a.r[i]);
 429 #else
 430         b.r[i] = 1.0/sqrtf(a.r[i]);
 431 #endif
 432     }
 433
 434     return b;
 435 };
 436
 437 static gmx_inline gmx_simd_ref_pr
 438 gmx_simd_ref_rcp_pr(gmx_simd_ref_pr a)
 439 {
 440     gmx_simd_ref_pr b;
 441     int             i;
 442
 443     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 444     {
 445         b.r[i] = 1.0/a.r[i];
 446     }
 447
 448     return b;
 449 };
 450
 451 static gmx_inline gmx_simd_ref_pr
 452 gmx_simd_ref_exp_pr(gmx_simd_ref_pr a)
 453 {
 454     gmx_simd_ref_pr b;
 455     int             i;
 456
 457     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 458     {
 459 #ifdef GMX_DOUBLE
 460         b.r[i] = exp(a.r[i]);
 461 #else
 462         b.r[i] = expf(a.r[i]);
 463 #endif
 464     }
 465
 466     return b;
 467 };
 468
 469 static gmx_inline gmx_simd_ref_pr
 470 gmx_simd_ref_sqrt_pr(gmx_simd_ref_pr a)
 471 {
 472     gmx_simd_ref_pr b;
 473     int             i;
 474
 475     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 476     {
 477 #ifdef GMX_DOUBLE
 478         b.r[i] = sqrt(a.r[i]);
 479 #else
 480         b.r[i] = sqrtf(a.r[i]);
 481 #endif
 482     }
 483
 484     return b;
 485 }
 486
 487 static gmx_inline int
 488 gmx_simd_ref_sincos_pr(gmx_simd_ref_pr a,
 489                        gmx_simd_ref_pr *s, gmx_simd_ref_pr *c)
 490 {
 491     int i;
 492
 493     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 494     {
 495         s->r[i] = sin(a.r[i]);
 496         c->r[i] = cos(a.r[i]);
 497     }
 498
 499     return 0;
 500 }
 501
 502 static gmx_inline gmx_simd_ref_pr
 503 gmx_simd_ref_acos_pr(gmx_simd_ref_pr a)
 504 {
 505     gmx_simd_ref_pr b;
 506     int             i;
 507
 508     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 509     {
 510         b.r[i] = acos(a.r[i]);
 511     }
 512
 513     return b;
 514 }
 515
 516 static gmx_inline gmx_simd_ref_pr
 517 gmx_simd_ref_atan2_pr(gmx_simd_ref_pr a, gmx_simd_ref_pr b)
 518 {
 519     gmx_simd_ref_pr c;
 520     int             i;
 521
 522     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 523     {
 524         c.r[i] = atan2(a.r[i], b.r[i]);
 525     }
 526
 527     return c;
 528 }
 529
 530 #endif /* _gmx_simd_ref_h_ */