src/gromacs/legacyheaders/gmx_simd_ref.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2012, The GROMACS Development Team
   6  * Copyright (c) 2012,2013, by the GROMACS development team, led by
   7  * David van der Spoel, Berk Hess, Erik Lindahl, and including many
   8  * others, as listed in the AUTHORS file in the top-level source
   9  * directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37
  38 #ifndef _gmx_simd_ref_h_
  39 #define _gmx_simd_ref_h_
  40
  41 /* This file contains a reference plain-C implementation of arbitrary width.
  42  * This code is only useful for testing and documentation.
  43  * The SIMD width is set by defining GMX_SIMD_REF_WIDTH before including.
  44  */
  45
  46
  47 #ifndef GMX_SIMD_REF_WIDTH
  48 #error "GMX_SIMD_REF_WIDTH should be defined before including gmx_simd_ref.h"
  49 #endif
  50
  51 #include <math.h>
  52
  53 /* float/double SIMD register type */
  54 typedef struct {
  55     real r[GMX_SIMD_REF_WIDTH];
  56 } gmx_simd_ref_pr;
  57
  58 /* boolean SIMD register type */
  59 typedef struct {
  60     char r[GMX_SIMD_REF_WIDTH];
  61 } gmx_simd_ref_pb;
  62
  63 /* integer SIMD register type, only for table indexing and exclusion masks */
  64 typedef struct {
  65     int r[GMX_SIMD_REF_WIDTH];
  66 } gmx_simd_ref_epi32;
  67 #define GMX_SIMD_REF_EPI32_WIDTH  GMX_SIMD_REF_WIDTH
  68
  69 /* Load GMX_SIMD_REF_WIDTH reals for memory starting at r */
  70 static gmx_inline gmx_simd_ref_pr
  71 gmx_simd_ref_load_pr(const real *r)
  72 {
  73     gmx_simd_ref_pr a;
  74     int             i;
  75
  76     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
  77     {
  78         a.r[i] = r[i];
  79     }
  80
  81     return a;
  82 }
  83
  84 /* Set all SIMD register elements to *r */
  85 static gmx_inline gmx_simd_ref_pr
  86 gmx_simd_ref_load1_pr(const real *r)
  87 {
  88     gmx_simd_ref_pr a;
  89     int             i;
  90
  91     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
  92     {
  93         a.r[i] = *r;
  94     }
  95
  96     return a;
  97 }
  98
  99 /* Set all SIMD register elements to r */
 100 static gmx_inline gmx_simd_ref_pr
 101 gmx_simd_ref_set1_pr(real r)
 102 {
 103     gmx_simd_ref_pr a;
 104     int             i;
 105
 106     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 107     {
 108         a.r[i] = r;
 109     }
 110
 111     return a;
 112 }
 113
 114 /* Set all SIMD register elements to 0 */
 115 static gmx_inline gmx_simd_ref_pr
 116 gmx_simd_ref_setzero_pr()
 117 {
 118     gmx_simd_ref_pr a;
 119     int             i;
 120
 121     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 122     {
 123         a.r[i] = 0.0;
 124     }
 125
 126     return a;
 127 }
 128
 129 static gmx_inline void
 130 gmx_simd_ref_store_pr(real *dest, gmx_simd_ref_pr src)
 131 {
 132     int i;
 133
 134     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 135     {
 136         dest[i] = src.r[i];
 137     }
 138 }
 139
 140 static gmx_inline gmx_simd_ref_pr
 141 gmx_simd_ref_add_pr(gmx_simd_ref_pr a, gmx_simd_ref_pr b)
 142 {
 143     gmx_simd_ref_pr c;
 144     int             i;
 145
 146     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 147     {
 148         c.r[i] = a.r[i] + b.r[i];
 149     }
 150
 151     return c;
 152 }
 153
 154 static gmx_inline gmx_simd_ref_pr
 155 gmx_simd_ref_sub_pr(gmx_simd_ref_pr a, gmx_simd_ref_pr b)
 156 {
 157     gmx_simd_ref_pr c;
 158     int             i;
 159
 160     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 161     {
 162         c.r[i] = a.r[i] - b.r[i];
 163     }
 164
 165     return c;
 166 }
 167
 168 static gmx_inline gmx_simd_ref_pr
 169 gmx_simd_ref_mul_pr(gmx_simd_ref_pr a, gmx_simd_ref_pr b)
 170 {
 171     gmx_simd_ref_pr c;
 172     int             i;
 173
 174     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 175     {
 176         c.r[i] = a.r[i]*b.r[i];
 177     }
 178
 179     return c;
 180 }
 181
 182 static gmx_inline gmx_simd_ref_pr
 183 gmx_simd_ref_madd_pr(gmx_simd_ref_pr a, gmx_simd_ref_pr b, gmx_simd_ref_pr c)
 184 {
 185     gmx_simd_ref_pr d;
 186     int             i;
 187
 188     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 189     {
 190         d.r[i] = a.r[i]*b.r[i] + c.r[i];
 191     }
 192
 193     return d;
 194 }
 195
 196 static gmx_inline gmx_simd_ref_pr
 197 gmx_simd_ref_nmsub_pr(gmx_simd_ref_pr a, gmx_simd_ref_pr b, gmx_simd_ref_pr c)
 198 {
 199     gmx_simd_ref_pr d;
 200     int             i;
 201
 202     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 203     {
 204         d.r[i] = -a.r[i]*b.r[i] + c.r[i];
 205     }
 206
 207     return d;
 208 }
 209
 210 static gmx_inline gmx_simd_ref_pr
 211 gmx_simd_ref_max_pr(gmx_simd_ref_pr a, gmx_simd_ref_pr b)
 212 {
 213     gmx_simd_ref_pr c;
 214     int             i;
 215
 216     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 217     {
 218         c.r[i] = (a.r[i] >= b.r[i] ? a.r[i] : b.r[i]);
 219     }
 220
 221     return c;
 222 }
 223
 224 static gmx_inline gmx_simd_ref_pr
 225 gmx_simd_ref_blendzero_pr(gmx_simd_ref_pr a, gmx_simd_ref_pb b)
 226 {
 227     gmx_simd_ref_pr c;
 228     int             i;
 229
 230     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 231     {
 232         c.r[i] = (b.r[i] ? a.r[i] : 0.0);
 233     }
 234
 235     return c;
 236 }
 237
 238 /* Note that this reference implementation rounds away from zero,
 239  * whereas most SIMD intrinsics will round to nearest even.
 240  * Since this function is only used for periodic image calculations,
 241  * the rounding of mantissas close to 0.5 is irrelevant.
 242  */
 243 static gmx_inline gmx_simd_ref_pr
 244 gmx_simd_ref_round_pr(gmx_simd_ref_pr a)
 245 {
 246     gmx_simd_ref_pr b;
 247     int             i;
 248
 249     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 250     {
 251 #ifdef GMX_DOUBLE
 252         b.r[i] = round(a.r[i]);
 253 #else
 254         b.r[i] = roundf(a.r[i]);
 255 #endif
 256     }
 257
 258     return b;
 259 }
 260
 261 /* Not required, only used to speed up the nbnxn tabulated PME kernels */
 262 static gmx_inline gmx_simd_ref_pr
 263 gmx_simd_ref_floor_pr(gmx_simd_ref_pr a)
 264 {
 265     gmx_simd_ref_pr b;
 266     int             i;
 267
 268     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 269     {
 270 #ifdef GMX_DOUBLE
 271         b.r[i] = floor(a.r[i]);
 272 #else
 273         b.r[i] = floorf(a.r[i]);
 274 #endif
 275     }
 276
 277     return b;
 278 }
 279
 280 /* Not required, only used when blendv is faster than comparison */
 281 static gmx_inline gmx_simd_ref_pr
 282 gmx_simd_ref_blendv_pr(gmx_simd_ref_pr a, gmx_simd_ref_pr b, gmx_simd_ref_pr c)
 283 {
 284     gmx_simd_ref_pr d;
 285     int             i;
 286
 287     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 288     {
 289         d.r[i] = (c.r[i] >= 0) ? a.r[i] : b.r[i];
 290     }
 291
 292     return d;
 293 }
 294
 295 /* Copy the sign of a to b, assumes b >= 0 for efficiency */
 296 static gmx_inline gmx_simd_ref_pr
 297 gmx_simd_ref_cpsgn_nonneg_pr(gmx_simd_ref_pr a, gmx_simd_ref_pr b)
 298 {
 299     gmx_simd_ref_pr c;
 300     int             i;
 301
 302     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 303     {
 304         c.r[i] = (a.r[i] >= 0) ? b.r[i] : -b.r[i];
 305     }
 306
 307     return c;
 308 }
 309
 310 /* Very specific operation required in the non-bonded kernels */
 311 static gmx_inline gmx_simd_ref_pr
 312 gmx_simd_ref_masknot_add_pr(gmx_simd_ref_pb a, gmx_simd_ref_pr b, gmx_simd_ref_pr c)
 313 {
 314     gmx_simd_ref_pr d;
 315     int             i;
 316
 317     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 318     {
 319         d.r[i] = a.r[i] ? b.r[i] : b.r[i] + c.r[i];
 320     }
 321
 322     return d;
 323 }
 324
 325 /* Comparison */
 326 static gmx_inline gmx_simd_ref_pb
 327 gmx_simd_ref_cmplt_pr(gmx_simd_ref_pr a, gmx_simd_ref_pr b)
 328 {
 329     gmx_simd_ref_pb c;
 330     int             i;
 331
 332     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 333     {
 334         c.r[i] = (a.r[i] < b.r[i]);
 335     }
 336
 337     return c;
 338 }
 339
 340 /* Logical AND on SIMD booleans */
 341 static gmx_inline gmx_simd_ref_pb
 342 gmx_simd_ref_and_pb(gmx_simd_ref_pb a, gmx_simd_ref_pb b)
 343 {
 344     gmx_simd_ref_pb c;
 345     int             i;
 346
 347     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 348     {
 349         c.r[i] = (a.r[i] && b.r[i]);
 350     }
 351
 352     return c;
 353 }
 354
 355 /* Logical OR on SIMD booleans */
 356 static gmx_inline gmx_simd_ref_pb
 357 gmx_simd_ref_or_pb(gmx_simd_ref_pb a, gmx_simd_ref_pb b)
 358 {
 359     gmx_simd_ref_pb c;
 360     int             i;
 361
 362     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 363     {
 364         c.r[i] = (a.r[i] || b.r[i]);
 365     }
 366
 367     return c;
 368 }
 369
 370 /* Not required, gmx_anytrue_pb(x) returns if any of the boolean is x is True.
 371  * If this is not present, define GMX_SIMD_IS_TRUE(real x),
 372  * which should return x==True, where True is True as defined in SIMD.
 373  */
 374 static gmx_inline int
 375 gmx_simd_ref_anytrue_pb(gmx_simd_ref_pb a)
 376 {
 377     int anytrue;
 378     int i;
 379
 380     anytrue = 0;
 381     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 382     {
 383         if (a.r[i])
 384         {
 385             anytrue = 1;
 386         }
 387     }
 388
 389     return anytrue;
 390 }
 391
 392 /* If we don't have gmx_anytrue_pb, we need to store gmx_mm_pb */
 393 static gmx_inline void
 394 gmx_simd_ref_store_pb(real *dest, gmx_simd_ref_pb src)
 395 {
 396     int i;
 397
 398     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 399     {
 400         dest[i] = src.r[i];
 401     }
 402 };
 403
 404
 405 /* For topology exclusion pair checking we need: ((a & b) ? True : False)
 406  * when we do a bit-wise and between a and b.
 407  * When integer SIMD operations are present, we use gmx_checkbitmask_epi32(a, b)
 408  * Otherwise we do all operations, except for the set1, in reals.
 409  */
 410
 411 /* Integer set and cast are only used for nbnxn exclusion masks */
 412 static gmx_inline gmx_simd_ref_epi32
 413 gmx_simd_ref_set1_epi32(int src)
 414 {
 415     gmx_simd_ref_epi32 a;
 416     int                i;
 417
 418     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 419     {
 420         a.r[i] = src;
 421     }
 422
 423     return a;
 424 }
 425
 426 static gmx_inline gmx_simd_ref_epi32
 427 gmx_simd_ref_load_si(const int *src)
 428 {
 429     gmx_simd_ref_epi32 a;
 430     int                i;
 431
 432     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 433     {
 434         a.r[i] = src[i];
 435     }
 436
 437     return a;
 438 }
 439
 440 /* If the same bit is set in both input masks, return TRUE, else FALSE.
 441  * This function is only called with a single bit set in b.
 442  */
 443 static gmx_inline gmx_simd_ref_pb
 444 gmx_simd_ref_checkbitmask_epi32(gmx_simd_ref_epi32 a, gmx_simd_ref_epi32 b)
 445 {
 446     gmx_simd_ref_pb c;
 447     int             i;
 448
 449     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 450     {
 451         c.r[i] = ((a.r[i] & b.r[i]) != 0);
 452     }
 453
 454     return c;
 455 }
 456
 457
 458 /* Conversions only used for PME table lookup */
 459 static gmx_inline gmx_simd_ref_epi32
 460 gmx_simd_ref_cvttpr_epi32(gmx_simd_ref_pr a)
 461 {
 462     gmx_simd_ref_epi32 b;
 463     int                i;
 464
 465     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 466     {
 467         b.r[i] = (int)a.r[i];
 468     }
 469
 470     return b;
 471 };
 472
 473 /* These two function only need to be approximate, Newton-Raphson iteration
 474  * is used for full accuracy in gmx_invsqrt_pr and gmx_inv_pr.
 475  */
 476 static gmx_inline gmx_simd_ref_pr
 477 gmx_simd_ref_rsqrt_pr(gmx_simd_ref_pr a)
 478 {
 479     gmx_simd_ref_pr b;
 480     int             i;
 481
 482     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 483     {
 484 #ifdef GMX_DOUBLE
 485         b.r[i] = 1.0/sqrt(a.r[i]);
 486 #else
 487         b.r[i] = 1.0/sqrtf(a.r[i]);
 488 #endif
 489     }
 490
 491     return b;
 492 };
 493
 494 static gmx_inline gmx_simd_ref_pr
 495 gmx_simd_ref_rcp_pr(gmx_simd_ref_pr a)
 496 {
 497     gmx_simd_ref_pr b;
 498     int             i;
 499
 500     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 501     {
 502         b.r[i] = 1.0/a.r[i];
 503     }
 504
 505     return b;
 506 };
 507
 508 static gmx_inline gmx_simd_ref_pr
 509 gmx_simd_ref_exp_pr(gmx_simd_ref_pr a)
 510 {
 511     gmx_simd_ref_pr b;
 512     int             i;
 513
 514     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 515     {
 516 #ifdef GMX_DOUBLE
 517         b.r[i] = exp(a.r[i]);
 518 #else
 519         b.r[i] = expf(a.r[i]);
 520 #endif
 521     }
 522
 523     return b;
 524 };
 525
 526 static gmx_inline gmx_simd_ref_pr
 527 gmx_simd_ref_sqrt_pr(gmx_simd_ref_pr a)
 528 {
 529     gmx_simd_ref_pr b;
 530     int             i;
 531
 532     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 533     {
 534 #ifdef GMX_DOUBLE
 535         b.r[i] = sqrt(a.r[i]);
 536 #else
 537         b.r[i] = sqrtf(a.r[i]);
 538 #endif
 539     }
 540
 541     return b;
 542 }
 543
 544 static gmx_inline int
 545 gmx_simd_ref_sincos_pr(gmx_simd_ref_pr a,
 546                        gmx_simd_ref_pr *s, gmx_simd_ref_pr *c)
 547 {
 548     int i;
 549
 550     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 551     {
 552         s->r[i] = sin(a.r[i]);
 553         c->r[i] = cos(a.r[i]);
 554     }
 555
 556     return 0;
 557 }
 558
 559 static gmx_inline gmx_simd_ref_pr
 560 gmx_simd_ref_acos_pr(gmx_simd_ref_pr a)
 561 {
 562     gmx_simd_ref_pr b;
 563     int             i;
 564
 565     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 566     {
 567         b.r[i] = acos(a.r[i]);
 568     }
 569
 570     return b;
 571 }
 572
 573 static gmx_inline gmx_simd_ref_pr
 574 gmx_simd_ref_atan2_pr(gmx_simd_ref_pr a, gmx_simd_ref_pr b)
 575 {
 576     gmx_simd_ref_pr c;
 577     int             i;
 578
 579     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 580     {
 581         c.r[i] = atan2(a.r[i], b.r[i]);
 582     }
 583
 584     return c;
 585 }
 586
 587 #endif /* _gmx_simd_ref_h_ */