include/gmx_simd_ref.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2012, The GROMACS Development Team
   6  * Copyright (c) 2012,2013, by the GROMACS development team, led by
   7  * David van der Spoel, Berk Hess, Erik Lindahl, and including many
   8  * others, as listed in the AUTHORS file in the top-level source
   9  * directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37
  38 #ifndef _gmx_simd_ref_h_
  39 #define _gmx_simd_ref_h_
  40
  41 /* This file contains a reference plain-C implementation of arbitrary width.
  42  * This code is only useful for testing and documentation.
  43  * The SIMD width is set by defining GMX_SIMD_REF_WIDTH before including.
  44  */
  45
  46
  47 #ifndef GMX_SIMD_REF_WIDTH
  48 #error "GMX_SIMD_REF_WIDTH should be defined before including gmx_simd_ref.h"
  49 #endif
  50
  51 #include <math.h>
  52
  53 /* float/double SIMD register type */
  54 typedef struct {
  55     real r[GMX_SIMD_REF_WIDTH];
  56 } gmx_simd_ref_pr;
  57
  58 /* boolean SIMD register type */
  59 typedef struct {
  60     char r[GMX_SIMD_REF_WIDTH];
  61 } gmx_simd_ref_pb;
  62
  63 /* integer SIMD register type, only for table indexing and exclusion masks */
  64 typedef struct {
  65     int r[GMX_SIMD_REF_WIDTH];
  66 } gmx_simd_ref_epi32;
  67 #define GMX_SIMD_REF_EPI32_WIDTH  GMX_SIMD_REF_WIDTH
  68
  69 /* Load GMX_SIMD_REF_WIDTH reals for memory starting at r */
  70 static gmx_inline gmx_simd_ref_pr
  71 gmx_simd_ref_load_pr(const real *r)
  72 {
  73     gmx_simd_ref_pr a;
  74     int             i;
  75
  76     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
  77     {
  78         a.r[i] = r[i];
  79     }
  80
  81     return a;
  82 }
  83
  84 /* Set all SIMD register elements to *r */
  85 static gmx_inline gmx_simd_ref_pr
  86 gmx_simd_ref_load1_pr(const real *r)
  87 {
  88     gmx_simd_ref_pr a;
  89     int             i;
  90
  91     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
  92     {
  93         a.r[i] = *r;
  94     }
  95
  96     return a;
  97 }
  98
  99 /* Set all SIMD register elements to r */
 100 static gmx_inline gmx_simd_ref_pr
 101 gmx_simd_ref_set1_pr(real r)
 102 {
 103     gmx_simd_ref_pr a;
 104     int             i;
 105
 106     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 107     {
 108         a.r[i] = r;
 109     }
 110
 111     return a;
 112 }
 113
 114 /* Set all SIMD register elements to 0 */
 115 static gmx_inline gmx_simd_ref_pr
 116 gmx_simd_ref_setzero_pr()
 117 {
 118     gmx_simd_ref_pr a;
 119     int             i;
 120
 121     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 122     {
 123         a.r[i] = 0.0;
 124     }
 125
 126     return a;
 127 }
 128
 129 static gmx_inline void
 130 gmx_simd_ref_store_pr(real *dest, gmx_simd_ref_pr src)
 131 {
 132     int i;
 133
 134     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 135     {
 136         dest[i] = src.r[i];
 137     }
 138 }
 139
 140 static gmx_inline gmx_simd_ref_pr
 141 gmx_simd_ref_add_pr(gmx_simd_ref_pr a, gmx_simd_ref_pr b)
 142 {
 143     gmx_simd_ref_pr c;
 144     int             i;
 145
 146     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 147     {
 148         c.r[i] = a.r[i] + b.r[i];
 149     }
 150
 151     return c;
 152 }
 153
 154 static gmx_inline gmx_simd_ref_pr
 155 gmx_simd_ref_sub_pr(gmx_simd_ref_pr a, gmx_simd_ref_pr b)
 156 {
 157     gmx_simd_ref_pr c;
 158     int             i;
 159
 160     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 161     {
 162         c.r[i] = a.r[i] - b.r[i];
 163     }
 164
 165     return c;
 166 }
 167
 168 static gmx_inline gmx_simd_ref_pr
 169 gmx_simd_ref_mul_pr(gmx_simd_ref_pr a, gmx_simd_ref_pr b)
 170 {
 171     gmx_simd_ref_pr c;
 172     int             i;
 173
 174     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 175     {
 176         c.r[i] = a.r[i]*b.r[i];
 177     }
 178
 179     return c;
 180 }
 181
 182 static gmx_inline gmx_simd_ref_pr
 183 gmx_simd_ref_madd_pr(gmx_simd_ref_pr a, gmx_simd_ref_pr b, gmx_simd_ref_pr c)
 184 {
 185     gmx_simd_ref_pr d;
 186     int             i;
 187
 188     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 189     {
 190         d.r[i] = a.r[i]*b.r[i] + c.r[i];
 191     }
 192
 193     return d;
 194 }
 195
 196 static gmx_inline gmx_simd_ref_pr
 197 gmx_simd_ref_nmsub_pr(gmx_simd_ref_pr a, gmx_simd_ref_pr b, gmx_simd_ref_pr c)
 198 {
 199     gmx_simd_ref_pr d;
 200     int             i;
 201
 202     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 203     {
 204         d.r[i] = -a.r[i]*b.r[i] + c.r[i];
 205     }
 206
 207     return d;
 208 }
 209
 210 static gmx_inline gmx_simd_ref_pr
 211 gmx_simd_ref_max_pr(gmx_simd_ref_pr a, gmx_simd_ref_pr b)
 212 {
 213     gmx_simd_ref_pr c;
 214     int             i;
 215
 216     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 217     {
 218         c.r[i] = (a.r[i] >= b.r[i] ? a.r[i] : b.r[i]);
 219     }
 220
 221     return c;
 222 }
 223
 224 static gmx_inline gmx_simd_ref_pr
 225 gmx_simd_ref_blendzero_pr(gmx_simd_ref_pr a, gmx_simd_ref_pb b)
 226 {
 227     gmx_simd_ref_pr c;
 228     int             i;
 229
 230     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 231     {
 232         c.r[i] = (b.r[i] ? a.r[i] : 0.0);
 233     }
 234
 235     return c;
 236 }
 237
 238 /* Note that this reference implementation rounds away from zero,
 239  * whereas most SIMD intrinsics will round to nearest even. Since this
 240  * function is only used for periodic image calculations, the rounding
 241  * of mantissas close to 0.5 is irrelevant, except in testing. This
 242  * could be fixed by using rint/rintf, but the bigger problem is that
 243  * MSVC does not support full C99, and none of the round or rint
 244  * functions are defined. It's much easier to approximately implement
 245  * round() than rint(), so we do that and hope we never get bitten in
 246  * testing. (Thanks, Microsoft.)
 247  */
 248 static gmx_inline gmx_simd_ref_pr
 249 gmx_simd_ref_round_pr(gmx_simd_ref_pr a)
 250 {
 251     gmx_simd_ref_pr b;
 252     int             i;
 253
 254     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 255     {
 256 #ifdef _MSC_VER
 257         int temp = (a.r[i] >= 0.)
 258             ? (a.r[i] + 0.5)
 259             : (a.r[i] - 0.5);
 260         b.r[i] = (real) temp;
 261 #elif defined GMX_DOUBLE
 262         b.r[i] = round(a.r[i]);
 263 #else
 264         b.r[i] = roundf(a.r[i]);
 265 #endif
 266     }
 267
 268     return b;
 269 }
 270
 271 /* Not required, only used to speed up the nbnxn tabulated PME kernels */
 272 static gmx_inline gmx_simd_ref_pr
 273 gmx_simd_ref_floor_pr(gmx_simd_ref_pr a)
 274 {
 275     gmx_simd_ref_pr b;
 276     int             i;
 277
 278     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 279     {
 280 #ifdef GMX_DOUBLE
 281         b.r[i] = floor(a.r[i]);
 282 #else
 283         b.r[i] = floorf(a.r[i]);
 284 #endif
 285     }
 286
 287     return b;
 288 }
 289
 290 /* Not required, only used when blendv is faster than comparison */
 291 static gmx_inline gmx_simd_ref_pr
 292 gmx_simd_ref_blendv_pr(gmx_simd_ref_pr a, gmx_simd_ref_pr b, gmx_simd_ref_pr c)
 293 {
 294     gmx_simd_ref_pr d;
 295     int             i;
 296
 297     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 298     {
 299         d.r[i] = (c.r[i] >= 0) ? a.r[i] : b.r[i];
 300     }
 301
 302     return d;
 303 }
 304
 305 /* Copy the sign of a to b, assumes b >= 0 for efficiency */
 306 static gmx_inline gmx_simd_ref_pr
 307 gmx_simd_ref_cpsgn_nonneg_pr(gmx_simd_ref_pr a, gmx_simd_ref_pr b)
 308 {
 309     gmx_simd_ref_pr c;
 310     int             i;
 311
 312     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 313     {
 314         c.r[i] = (a.r[i] >= 0) ? b.r[i] : -b.r[i];
 315     }
 316
 317     return c;
 318 }
 319
 320 /* Very specific operation required in the non-bonded kernels */
 321 static gmx_inline gmx_simd_ref_pr
 322 gmx_simd_ref_masknot_add_pr(gmx_simd_ref_pb a, gmx_simd_ref_pr b, gmx_simd_ref_pr c)
 323 {
 324     gmx_simd_ref_pr d;
 325     int             i;
 326
 327     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 328     {
 329         d.r[i] = a.r[i] ? b.r[i] : b.r[i] + c.r[i];
 330     }
 331
 332     return d;
 333 }
 334
 335 /* Comparison */
 336 static gmx_inline gmx_simd_ref_pb
 337 gmx_simd_ref_cmplt_pr(gmx_simd_ref_pr a, gmx_simd_ref_pr b)
 338 {
 339     gmx_simd_ref_pb c;
 340     int             i;
 341
 342     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 343     {
 344         c.r[i] = (a.r[i] < b.r[i]);
 345     }
 346
 347     return c;
 348 }
 349
 350 /* Logical AND on SIMD booleans */
 351 static gmx_inline gmx_simd_ref_pb
 352 gmx_simd_ref_and_pb(gmx_simd_ref_pb a, gmx_simd_ref_pb b)
 353 {
 354     gmx_simd_ref_pb c;
 355     int             i;
 356
 357     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 358     {
 359         c.r[i] = (a.r[i] && b.r[i]);
 360     }
 361
 362     return c;
 363 }
 364
 365 /* Logical OR on SIMD booleans */
 366 static gmx_inline gmx_simd_ref_pb
 367 gmx_simd_ref_or_pb(gmx_simd_ref_pb a, gmx_simd_ref_pb b)
 368 {
 369     gmx_simd_ref_pb c;
 370     int             i;
 371
 372     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 373     {
 374         c.r[i] = (a.r[i] || b.r[i]);
 375     }
 376
 377     return c;
 378 }
 379
 380 /* Not required, gmx_anytrue_pb(x) returns if any of the boolean is x is True.
 381  * If this is not present, define GMX_SIMD_IS_TRUE(real x),
 382  * which should return x==True, where True is True as defined in SIMD.
 383  */
 384 static gmx_inline int
 385 gmx_simd_ref_anytrue_pb(gmx_simd_ref_pb a)
 386 {
 387     int anytrue;
 388     int i;
 389
 390     anytrue = 0;
 391     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 392     {
 393         if (a.r[i])
 394         {
 395             anytrue = 1;
 396         }
 397     }
 398
 399     return anytrue;
 400 }
 401
 402 /* If we don't have gmx_anytrue_pb, we need to store gmx_mm_pb */
 403 static gmx_inline void
 404 gmx_simd_ref_store_pb(real *dest, gmx_simd_ref_pb src)
 405 {
 406     int i;
 407
 408     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 409     {
 410         dest[i] = src.r[i];
 411     }
 412 };
 413
 414 /* Conversions only used for PME table lookup */
 415 static gmx_inline gmx_simd_ref_epi32
 416 gmx_simd_ref_cvttpr_epi32(gmx_simd_ref_pr a)
 417 {
 418     gmx_simd_ref_epi32 b;
 419     int                i;
 420
 421     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 422     {
 423         b.r[i] = (int)a.r[i];
 424     }
 425
 426     return b;
 427 };
 428
 429 /* These two function only need to be approximate, Newton-Raphson iteration
 430  * is used for full accuracy in gmx_invsqrt_pr and gmx_inv_pr.
 431  */
 432 static gmx_inline gmx_simd_ref_pr
 433 gmx_simd_ref_rsqrt_pr(gmx_simd_ref_pr a)
 434 {
 435     gmx_simd_ref_pr b;
 436     int             i;
 437
 438     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 439     {
 440 #ifdef GMX_DOUBLE
 441         b.r[i] = 1.0/sqrt(a.r[i]);
 442 #else
 443         b.r[i] = 1.0/sqrtf(a.r[i]);
 444 #endif
 445     }
 446
 447     return b;
 448 };
 449
 450 static gmx_inline gmx_simd_ref_pr
 451 gmx_simd_ref_rcp_pr(gmx_simd_ref_pr a)
 452 {
 453     gmx_simd_ref_pr b;
 454     int             i;
 455
 456     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 457     {
 458         b.r[i] = 1.0/a.r[i];
 459     }
 460
 461     return b;
 462 };
 463
 464 static gmx_inline gmx_simd_ref_pr
 465 gmx_simd_ref_exp_pr(gmx_simd_ref_pr a)
 466 {
 467     gmx_simd_ref_pr b;
 468     int             i;
 469
 470     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 471     {
 472 #ifdef GMX_DOUBLE
 473         b.r[i] = exp(a.r[i]);
 474 #else
 475         b.r[i] = expf(a.r[i]);
 476 #endif
 477     }
 478
 479     return b;
 480 };
 481
 482 static gmx_inline gmx_simd_ref_pr
 483 gmx_simd_ref_sqrt_pr(gmx_simd_ref_pr a)
 484 {
 485     gmx_simd_ref_pr b;
 486     int             i;
 487
 488     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 489     {
 490 #ifdef GMX_DOUBLE
 491         b.r[i] = sqrt(a.r[i]);
 492 #else
 493         b.r[i] = sqrtf(a.r[i]);
 494 #endif
 495     }
 496
 497     return b;
 498 }
 499
 500 static gmx_inline int
 501 gmx_simd_ref_sincos_pr(gmx_simd_ref_pr a,
 502                        gmx_simd_ref_pr *s, gmx_simd_ref_pr *c)
 503 {
 504     int i;
 505
 506     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 507     {
 508         s->r[i] = sin(a.r[i]);
 509         c->r[i] = cos(a.r[i]);
 510     }
 511
 512     return 0;
 513 }
 514
 515 static gmx_inline gmx_simd_ref_pr
 516 gmx_simd_ref_acos_pr(gmx_simd_ref_pr a)
 517 {
 518     gmx_simd_ref_pr b;
 519     int             i;
 520
 521     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 522     {
 523         b.r[i] = acos(a.r[i]);
 524     }
 525
 526     return b;
 527 }
 528
 529 static gmx_inline gmx_simd_ref_pr
 530 gmx_simd_ref_atan2_pr(gmx_simd_ref_pr a, gmx_simd_ref_pr b)
 531 {
 532     gmx_simd_ref_pr c;
 533     int             i;
 534
 535     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 536     {
 537         c.r[i] = atan2(a.r[i], b.r[i]);
 538     }
 539
 540     return c;
 541 }
 542
 543 #endif /* _gmx_simd_ref_h_ */