include/gmx_simd_ref.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2012, The GROMACS Development Team
   6  * Copyright (c) 2012,2013, by the GROMACS development team, led by
   7  * David van der Spoel, Berk Hess, Erik Lindahl, and including many
   8  * others, as listed in the AUTHORS file in the top-level source
   9  * directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37
  38 #ifndef _gmx_simd_ref_h_
  39 #define _gmx_simd_ref_h_
  40
  41 /* This file contains a reference plain-C implementation of arbitrary width.
  42  * This code is only useful for testing and documentation.
  43  * The SIMD width is set by defining GMX_SIMD_REF_WIDTH before including.
  44  */
  45
  46
  47 #ifndef GMX_SIMD_REF_WIDTH
  48 #error "GMX_SIMD_REF_WIDTH should be defined before including gmx_simd_ref.h"
  49 #endif
  50
  51 #include <math.h>
  52
  53 /* float/double SIMD register type */
  54 typedef struct {
  55     real r[GMX_SIMD_REF_WIDTH];
  56 } gmx_simd_ref_pr;
  57
  58 /* boolean SIMD register type */
  59 typedef struct {
  60     char r[GMX_SIMD_REF_WIDTH];
  61 } gmx_simd_ref_pb;
  62
  63 /* integer SIMD register type, only for table indexing and exclusion masks */
  64 typedef struct {
  65     int r[GMX_SIMD_REF_WIDTH];
  66 } gmx_simd_ref_epi32;
  67 #define GMX_SIMD_REF_EPI32_WIDTH  GMX_SIMD_REF_WIDTH
  68
  69 /* Load GMX_SIMD_REF_WIDTH reals for memory starting at r */
  70 static gmx_inline gmx_simd_ref_pr
  71 gmx_simd_ref_load_pr(const real *r)
  72 {
  73     gmx_simd_ref_pr a;
  74     int             i;
  75
  76     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
  77     {
  78         a.r[i] = r[i];
  79     }
  80
  81     return a;
  82 }
  83
  84 /* Set all SIMD register elements to *r */
  85 static gmx_inline gmx_simd_ref_pr
  86 gmx_simd_ref_load1_pr(const real *r)
  87 {
  88     gmx_simd_ref_pr a;
  89     int             i;
  90
  91     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
  92     {
  93         a.r[i] = *r;
  94     }
  95
  96     return a;
  97 }
  98
  99 /* Set all SIMD register elements to r */
 100 static gmx_inline gmx_simd_ref_pr
 101 gmx_simd_ref_set1_pr(real r)
 102 {
 103     gmx_simd_ref_pr a;
 104     int             i;
 105
 106     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 107     {
 108         a.r[i] = r;
 109     }
 110
 111     return a;
 112 }
 113
 114 /* Set all SIMD register elements to 0 */
 115 static gmx_inline gmx_simd_ref_pr
 116 gmx_simd_ref_setzero_pr()
 117 {
 118     gmx_simd_ref_pr a;
 119     int             i;
 120
 121     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 122     {
 123         a.r[i] = 0.0;
 124     }
 125
 126     return a;
 127 }
 128
 129 static gmx_inline void
 130 gmx_simd_ref_store_pr(real *dest, gmx_simd_ref_pr src)
 131 {
 132     int i;
 133
 134     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 135     {
 136         dest[i] = src.r[i];
 137     }
 138 }
 139
 140 static gmx_inline gmx_simd_ref_pr
 141 gmx_simd_ref_add_pr(gmx_simd_ref_pr a, gmx_simd_ref_pr b)
 142 {
 143     gmx_simd_ref_pr c;
 144     int             i;
 145
 146     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 147     {
 148         c.r[i] = a.r[i] + b.r[i];
 149     }
 150
 151     return c;
 152 }
 153
 154 static gmx_inline gmx_simd_ref_pr
 155 gmx_simd_ref_sub_pr(gmx_simd_ref_pr a, gmx_simd_ref_pr b)
 156 {
 157     gmx_simd_ref_pr c;
 158     int             i;
 159
 160     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 161     {
 162         c.r[i] = a.r[i] - b.r[i];
 163     }
 164
 165     return c;
 166 }
 167
 168 static gmx_inline gmx_simd_ref_pr
 169 gmx_simd_ref_mul_pr(gmx_simd_ref_pr a, gmx_simd_ref_pr b)
 170 {
 171     gmx_simd_ref_pr c;
 172     int             i;
 173
 174     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 175     {
 176         c.r[i] = a.r[i]*b.r[i];
 177     }
 178
 179     return c;
 180 }
 181
 182 static gmx_inline gmx_simd_ref_pr
 183 gmx_simd_ref_madd_pr(gmx_simd_ref_pr a, gmx_simd_ref_pr b, gmx_simd_ref_pr c)
 184 {
 185     gmx_simd_ref_pr d;
 186     int             i;
 187
 188     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 189     {
 190         d.r[i] = a.r[i]*b.r[i] + c.r[i];
 191     }
 192
 193     return d;
 194 }
 195
 196 static gmx_inline gmx_simd_ref_pr
 197 gmx_simd_ref_nmsub_pr(gmx_simd_ref_pr a, gmx_simd_ref_pr b, gmx_simd_ref_pr c)
 198 {
 199     gmx_simd_ref_pr d;
 200     int             i;
 201
 202     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 203     {
 204         d.r[i] = -a.r[i]*b.r[i] + c.r[i];
 205     }
 206
 207     return d;
 208 }
 209
 210 static gmx_inline gmx_simd_ref_pr
 211 gmx_simd_ref_max_pr(gmx_simd_ref_pr a, gmx_simd_ref_pr b)
 212 {
 213     gmx_simd_ref_pr c;
 214     int             i;
 215
 216     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 217     {
 218         c.r[i] = (a.r[i] >= b.r[i] ? a.r[i] : b.r[i]);
 219     }
 220
 221     return c;
 222 }
 223
 224 static gmx_inline gmx_simd_ref_pr
 225 gmx_simd_ref_blendzero_pr(gmx_simd_ref_pr a, gmx_simd_ref_pb b)
 226 {
 227     gmx_simd_ref_pr c;
 228     int             i;
 229
 230     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 231     {
 232         c.r[i] = (b.r[i] ? a.r[i] : 0.0);
 233     }
 234
 235     return c;
 236 }
 237
 238 /* Note that this reference implementation rounds away from zero,
 239  * whereas most SIMD intrinsics will round to nearest even. Since this
 240  * function is only used for periodic image calculations, the rounding
 241  * of mantissas close to 0.5 is irrelevant, except in testing. This
 242  * could be fixed by using rint/rintf, but the bigger problem is that
 243  * MSVC does not support full C99, and none of the round or rint
 244  * functions are defined. It's much easier to approximately implement
 245  * round() than rint(), so we do that and hope we never get bitten in
 246  * testing. (Thanks, Microsoft.)
 247  */
 248 static gmx_inline gmx_simd_ref_pr
 249 gmx_simd_ref_round_pr(gmx_simd_ref_pr a)
 250 {
 251     gmx_simd_ref_pr b;
 252     int             i;
 253
 254     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 255     {
 256 #ifdef _MSC_VER
 257         int temp = (a.r[i] >= 0.)
 258             ? (a.r[i] + 0.5)
 259             : (a.r[i] - 0.5);
 260         b.r[i] = (real) temp;
 261 #elif defined GMX_DOUBLE
 262         b.r[i] = round(a.r[i]);
 263 #else
 264         b.r[i] = roundf(a.r[i]);
 265 #endif
 266     }
 267
 268     return b;
 269 }
 270
 271 /* Not required, only used to speed up the nbnxn tabulated PME kernels */
 272 static gmx_inline gmx_simd_ref_pr
 273 gmx_simd_ref_floor_pr(gmx_simd_ref_pr a)
 274 {
 275     gmx_simd_ref_pr b;
 276     int             i;
 277
 278     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 279     {
 280 #ifdef GMX_DOUBLE
 281         b.r[i] = floor(a.r[i]);
 282 #else
 283         b.r[i] = floorf(a.r[i]);
 284 #endif
 285     }
 286
 287     return b;
 288 }
 289
 290 /* Not required, only used when blendv is faster than comparison */
 291 static gmx_inline gmx_simd_ref_pr
 292 gmx_simd_ref_blendv_pr(gmx_simd_ref_pr a, gmx_simd_ref_pr b, gmx_simd_ref_pr c)
 293 {
 294     gmx_simd_ref_pr d;
 295     int             i;
 296
 297     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 298     {
 299         d.r[i] = (c.r[i] >= 0) ? a.r[i] : b.r[i];
 300     }
 301
 302     return d;
 303 }
 304
 305 /* Copy the sign of a to b, assumes b >= 0 for efficiency */
 306 static gmx_inline gmx_simd_ref_pr
 307 gmx_simd_ref_cpsgn_nonneg_pr(gmx_simd_ref_pr a, gmx_simd_ref_pr b)
 308 {
 309     gmx_simd_ref_pr c;
 310     int             i;
 311
 312     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 313     {
 314         c.r[i] = (a.r[i] >= 0) ? b.r[i] : -b.r[i];
 315     }
 316
 317     return c;
 318 }
 319
 320 /* Very specific operation required in the non-bonded kernels */
 321 static gmx_inline gmx_simd_ref_pr
 322 gmx_simd_ref_masknot_add_pr(gmx_simd_ref_pb a, gmx_simd_ref_pr b, gmx_simd_ref_pr c)
 323 {
 324     gmx_simd_ref_pr d;
 325     int             i;
 326
 327     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 328     {
 329         d.r[i] = a.r[i] ? b.r[i] : b.r[i] + c.r[i];
 330     }
 331
 332     return d;
 333 }
 334
 335 /* Comparison */
 336 static gmx_inline gmx_simd_ref_pb
 337 gmx_simd_ref_cmplt_pr(gmx_simd_ref_pr a, gmx_simd_ref_pr b)
 338 {
 339     gmx_simd_ref_pb c;
 340     int             i;
 341
 342     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 343     {
 344         c.r[i] = (a.r[i] < b.r[i]);
 345     }
 346
 347     return c;
 348 }
 349
 350 /* Logical AND on SIMD booleans */
 351 static gmx_inline gmx_simd_ref_pb
 352 gmx_simd_ref_and_pb(gmx_simd_ref_pb a, gmx_simd_ref_pb b)
 353 {
 354     gmx_simd_ref_pb c;
 355     int             i;
 356
 357     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 358     {
 359         c.r[i] = (a.r[i] && b.r[i]);
 360     }
 361
 362     return c;
 363 }
 364
 365 /* Logical OR on SIMD booleans */
 366 static gmx_inline gmx_simd_ref_pb
 367 gmx_simd_ref_or_pb(gmx_simd_ref_pb a, gmx_simd_ref_pb b)
 368 {
 369     gmx_simd_ref_pb c;
 370     int             i;
 371
 372     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 373     {
 374         c.r[i] = (a.r[i] || b.r[i]);
 375     }
 376
 377     return c;
 378 }
 379
 380 /* Returns a single int (0/1) which tells if any of the booleans is True */
 381 static gmx_inline int
 382 gmx_simd_ref_anytrue_pb(gmx_simd_ref_pb a)
 383 {
 384     int anytrue;
 385     int i;
 386
 387     anytrue = 0;
 388     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 389     {
 390         if (a.r[i])
 391         {
 392             anytrue = 1;
 393         }
 394     }
 395
 396     return anytrue;
 397 }
 398
 399 /* Conversions only used for PME table lookup */
 400 static gmx_inline gmx_simd_ref_epi32
 401 gmx_simd_ref_cvttpr_epi32(gmx_simd_ref_pr a)
 402 {
 403     gmx_simd_ref_epi32 b;
 404     int                i;
 405
 406     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 407     {
 408         b.r[i] = (int)a.r[i];
 409     }
 410
 411     return b;
 412 };
 413
 414 /* These two function only need to be approximate, Newton-Raphson iteration
 415  * is used for full accuracy in gmx_invsqrt_pr and gmx_inv_pr.
 416  */
 417 static gmx_inline gmx_simd_ref_pr
 418 gmx_simd_ref_rsqrt_pr(gmx_simd_ref_pr a)
 419 {
 420     gmx_simd_ref_pr b;
 421     int             i;
 422
 423     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 424     {
 425 #ifdef GMX_DOUBLE
 426         b.r[i] = 1.0/sqrt(a.r[i]);
 427 #else
 428         b.r[i] = 1.0/sqrtf(a.r[i]);
 429 #endif
 430     }
 431
 432     return b;
 433 };
 434
 435 static gmx_inline gmx_simd_ref_pr
 436 gmx_simd_ref_rcp_pr(gmx_simd_ref_pr a)
 437 {
 438     gmx_simd_ref_pr b;
 439     int             i;
 440
 441     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 442     {
 443         b.r[i] = 1.0/a.r[i];
 444     }
 445
 446     return b;
 447 };
 448
 449 static gmx_inline gmx_simd_ref_pr
 450 gmx_simd_ref_exp_pr(gmx_simd_ref_pr a)
 451 {
 452     gmx_simd_ref_pr b;
 453     int             i;
 454
 455     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 456     {
 457 #ifdef GMX_DOUBLE
 458         b.r[i] = exp(a.r[i]);
 459 #else
 460         b.r[i] = expf(a.r[i]);
 461 #endif
 462     }
 463
 464     return b;
 465 };
 466
 467 static gmx_inline gmx_simd_ref_pr
 468 gmx_simd_ref_sqrt_pr(gmx_simd_ref_pr a)
 469 {
 470     gmx_simd_ref_pr b;
 471     int             i;
 472
 473     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 474     {
 475 #ifdef GMX_DOUBLE
 476         b.r[i] = sqrt(a.r[i]);
 477 #else
 478         b.r[i] = sqrtf(a.r[i]);
 479 #endif
 480     }
 481
 482     return b;
 483 }
 484
 485 static gmx_inline int
 486 gmx_simd_ref_sincos_pr(gmx_simd_ref_pr a,
 487                        gmx_simd_ref_pr *s, gmx_simd_ref_pr *c)
 488 {
 489     int i;
 490
 491     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 492     {
 493         s->r[i] = sin(a.r[i]);
 494         c->r[i] = cos(a.r[i]);
 495     }
 496
 497     return 0;
 498 }
 499
 500 static gmx_inline gmx_simd_ref_pr
 501 gmx_simd_ref_acos_pr(gmx_simd_ref_pr a)
 502 {
 503     gmx_simd_ref_pr b;
 504     int             i;
 505
 506     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 507     {
 508         b.r[i] = acos(a.r[i]);
 509     }
 510
 511     return b;
 512 }
 513
 514 static gmx_inline gmx_simd_ref_pr
 515 gmx_simd_ref_atan2_pr(gmx_simd_ref_pr a, gmx_simd_ref_pr b)
 516 {
 517     gmx_simd_ref_pr c;
 518     int             i;
 519
 520     for (i = 0; i < GMX_SIMD_REF_WIDTH; i++)
 521     {
 522         c.r[i] = atan2(a.r[i], b.r[i]);
 523     }
 524
 525     return c;
 526 }
 527
 528 #endif /* _gmx_simd_ref_h_ */