src/gromacs/legacyheaders/thread_mpi/atomic/gcc_ia64.h

   1 /*
   2    This source code file is part of thread_mpi.
   3    Written by Sander Pronk, Erik Lindahl, and possibly others.
   4
   5    Copyright (c) 2009, Sander Pronk, Erik Lindahl.
   6    All rights reserved.
   7
   8    Redistribution and use in source and binary forms, with or without
   9    modification, are permitted provided that the following conditions are met:
  10    1) Redistributions of source code must retain the above copyright
  11    notice, this list of conditions and the following disclaimer.
  12    2) Redistributions in binary form must reproduce the above copyright
  13    notice, this list of conditions and the following disclaimer in the
  14    documentation and/or other materials provided with the distribution.
  15    3) Neither the name of the copyright holders nor the
  16    names of its contributors may be used to endorse or promote products
  17    derived from this software without specific prior written permission.
  18
  19    THIS SOFTWARE IS PROVIDED BY US ''AS IS'' AND ANY
  20    EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  21    WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  22    DISCLAIMED. IN NO EVENT SHALL WE BE LIABLE FOR ANY
  23    DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  24    (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  25    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  26    ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  27    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  28    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29
  30    If you want to redistribute modifications, please consider that
  31    scientific software is very special. Version control is crucial -
  32    bugs must be traceable. We will be happy to consider code for
  33    inclusion in the official distribution, but derived work should not
  34    be called official thread_mpi. Details are found in the README & COPYING
  35    files.
  36  */
  37
  38 /* ia64 with GCC or Intel compilers. Since we need to define everything through
  39  * cmpxchg and fetchadd on ia64, we merge the different compilers and only
  40  * provide different implementations for that single function.
  41  * Documentation? Check the gcc/x86 section.
  42  */
  43
  44
  45 typedef struct tMPI_Atomic
  46 {
  47     volatile int value; /*!< Volatile, to avoid compiler aliasing */
  48 }
  49 tMPI_Atomic_t;
  50
  51 typedef struct tMPI_Atomic_ptr
  52 {
  53     void* volatile value; /*!< Volatile, to avoid compiler aliasing */
  54 }
  55 tMPI_Atomic_ptr_t;
  56
  57
  58 #define TMPI_SPINLOCK_INITIALIZER   { 0 }
  59
  60
  61 #define tMPI_Atomic_get(a)   ((a)->value)
  62 #define tMPI_Atomic_set(a, i)  (((a)->value) = (i))
  63
  64 #define tMPI_Atomic_ptr_get(a)   ((a)->value)
  65 #define tMPI_Atomic_ptr_set(a, i)  (((a)->value) = (i))
  66
  67
  68
  69 #ifndef __INTEL_COMPILER
  70 #define TMPI_HAVE_SWAP
  71 /* xchg operations: */
  72 /* ia64 xchg */
  73 static inline int tMPI_Atomic_swap(tMPI_Atomic_t *a, int b)
  74 {
  75     volatile int res;
  76     asm volatile ("xchg4 %0=[%1],%2" :
  77                   "=r" (res) : "r" (&a->value), "r" (b) : "memory");
  78
  79     return res;
  80 }
  81 /* ia64 ptr xchg */
  82 static inline void* tMPI_Atomic_ptr_swap(tMPI_Atomic_ptr_t * a, void *b)
  83 {
  84     void* volatile* res;
  85
  86
  87     asm volatile ("xchg8 %0=[%1],%2" :
  88                   "=r" (res) : "r" (&a->value), "r" (b) : "memory");
  89     return (void*)res;
  90 }
  91 #endif
  92
  93
  94
  95 /* do the intrinsics. icc on windows doesn't have them. */
  96 #if ( (TMPI_GCC_VERSION >= 40100) )
  97
  98 #include "gcc_intrinsics.h"
  99
 100 /* our spinlock is not really any better than gcc's based on its intrinsics */
 101 #include "gcc_spinlock.h"
 102 #else
 103
 104
 105 /* Compiler thingies */
 106 #ifdef __INTEL_COMPILER
 107 /* prototypes are neccessary for these intrisics: */
 108 #include <ia64intrin.h>
 109 void __memory_barrier(void);
 110 int _InterlockedCompareExchange(volatile int *dest, int xchg, int comp);
 111 /*void* _InterlockedCompareExchangePointer(void* volatile **dest, void* xchg,
 112                                          void* comp);*/
 113 unsigned __int64 __fetchadd4_rel(unsigned int *addend, const int increment);
 114 /* ia64 memory barrier */
 115 /*#define tMPI_Atomic_memory_barrier() __memory_barrier()*/
 116 #define tMPI_Atomic_memory_barrier() __sync_synchronize()
 117 /* ia64 cmpxchg */
 118 #define tMPI_Atomic_cas(a, oldval, newval) \
 119     (_InterlockedCompareExchange(&((a)->value), newval, oldval) == oldval)
 120 /* ia64 pointer cmpxchg */
 121 #define tMPI_Atomic_ptr_cas(a, oldval, newval) \
 122     (_InterlockedCompareExchangePointer(&((a)->value), newval, oldval) == oldval)
 123
 124 /*#define tMPI_Atomic_ptr_cas(a, oldval, newval) __sync_val_compare_and_swap(&((a)->value),newval,oldval)*/
 125
 126
 127 /* ia64 fetchadd, but it only works with increments +/- 1,4,8,16 */
 128 #define tMPI_ia64_fetchadd(a, inc)  __fetchadd4_rel(a, inc)
 129
 130 #define TMPI_HAVE_SWAP
 131 #define tMPI_Atomic_swap(a, b) _InterlockedExchange( &((a)->value), (b))
 132 #define tMPI_Atomic_ptr_swap(a, b) _InterlockedExchangePointer( &((a)->value), (b))
 133
 134 #elif defined __GNUC__
 135
 136 /* ia64 memory barrier */
 137 #define tMPI_Atomic_memory_barrier() asm volatile ("mf" ::: "memory")
 138
 139 /* ia64 cmpxchg */
 140 static inline int tMPI_Atomic_cas(tMPI_Atomic_t *a, int oldval, int newval)
 141 {
 142 #if GCC_VERSION < 40200
 143     volatile int res;
 144     asm volatile ("mov ar.ccv=%0;;" :: "rO" (oldval));
 145     asm volatile ("cmpxchg4.acq %0=[%1],%2,ar.ccv" :
 146                   "=r" (res) : "r" (&a->value), "r" (newval) : "memory");
 147
 148     return res == oldval;
 149 #else
 150     return __sync_bool_compare_and_swap( &(a->value), oldval, newval);
 151 #endif
 152 }
 153
 154 /* ia64 ptr cmpxchg */
 155 static inline int tMPI_Atomic_ptr_cas(tMPI_Atomic_ptr_t * a, void *oldval,
 156                                       void *newval)
 157 {
 158 #if GCC_VERSION < 40200
 159     void* volatile* res;
 160     asm volatile ("mov ar.ccv=%0;;" :: "rO" (oldval));
 161     asm volatile ("cmpxchg8.acq %0=[%1],%2,ar.ccv" :
 162                   "=r" (res) : "r" (&a->value), "r" (newval) : "memory");
 163
 164     return ((void*)res) == oldval;
 165 #else
 166     return __sync_bool_compare_and_swap( &(a->value), oldval, newval);
 167 #endif
 168 }
 169
 170
 171 /* fetchadd, but on ia64 it only works with increments +/- 1,4,8,16 */
 172 #define tMPI_ia64_fetchadd(a, inc)                                             \
 173     ({  unsigned long res;                                                        \
 174         asm volatile ("fetchadd4.rel %0=[%1],%2"                                  \
 175                       : "=r" (res) : "r" (a), "r" (inc) : "memory");                \
 176         res;                                                        \
 177      })
 178
 179
 180
 181 #else  /* Unknown compiler */
 182 #  error Unknown ia64 compiler (not GCC or ICC) - modify tMPI_Thread.h!
 183 #endif /* end of gcc/icc specific section */
 184
 185
 186
 187
 188 static inline int tMPI_Atomic_add_return(tMPI_Atomic_t *a, int i)
 189 {
 190     volatile int oldval, newval;
 191     volatile int __i = i;
 192
 193     /* Use fetchadd if, and only if, the increment value can be determined
 194      * at compile time (otherwise this check is optimized away) and it is
 195      * a value supported by fetchadd (1,4,8,16,-1,-4,-8,-16).
 196      */
 197     if (__builtin_constant_p(i) &&
 198         ( (__i ==   1) || (__i ==   4)  || (__i ==   8) || (__i ==  16) ||
 199           (__i ==  -1) || (__i ==  -4)  || (__i ==  -8) || (__i == -16) ) )
 200     {
 201         oldval = tMPI_ia64_fetchadd((unsigned int*)&(a->value), __i);
 202         newval = oldval + i;
 203     }
 204     else
 205     {
 206         /* Use compare-exchange addition that works with any value */
 207         do
 208         {
 209             oldval = tMPI_Atomic_get(a);
 210             newval = oldval + i;
 211         }
 212         while (!tMPI_Atomic_cas(a, oldval, newval));
 213     }
 214     return (int)newval;
 215 }
 216
 217
 218
 219 static inline int tMPI_Atomic_fetch_add(tMPI_Atomic_t *a, int i)
 220 {
 221     volatile int oldval, newval;
 222     volatile int __i = i;
 223
 224     /* Use ia64 fetchadd if, and only if, the increment value can be determined
 225      * at compile time (otherwise this check is optimized away) and it is
 226      * a value supported by fetchadd (1,4,8,16,-1,-4,-8,-16).
 227      */
 228     if (__builtin_constant_p(i) &&
 229         ( (__i ==   1) || (__i ==   4)  || (__i ==   8) || (__i ==  16) ||
 230           (__i ==  -1) || (__i ==  -4)  || (__i ==  -8) || (__i == -16) ) )
 231     {
 232         oldval = tMPI_ia64_fetchadd((unsigned int*)&(a->value), __i);
 233         newval = oldval + i;
 234     }
 235     else
 236     {
 237         /* Use compare-exchange addition that works with any value */
 238         do
 239         {
 240             oldval = tMPI_Atomic_get(a);
 241             newval = oldval + i;
 242         }
 243         while (!tMPI_Atomic_cas(a, oldval, newval));
 244     }
 245     return (int)oldval;
 246 }
 247
 248 typedef struct tMPI_Spinlock
 249 {
 250     volatile unsigned int   lock; /*!< Volatile, to avoid compiler aliasing */
 251 }
 252 tMPI_Spinlock_t;
 253
 254
 255
 256 static inline void tMPI_Spinlock_init(tMPI_Spinlock_t *x)
 257 {
 258     x->lock = 0;
 259 }
 260
 261
 262 static inline void tMPI_Spinlock_lock(tMPI_Spinlock_t *x)
 263 {
 264     tMPI_Atomic_t *a = (tMPI_Atomic_t *) x;
 265     int            succeeded;
 266     succeeded = tMPI_Atomic_cas(a, 0, 1);
 267     if (!succeeded)
 268     {
 269         do
 270         {
 271             while (a->value != 0)
 272             {
 273                 tMPI_Atomic_memory_barrier();
 274             }
 275             succeeded = tMPI_Atomic_cas(a, 0, 1);
 276         }
 277         while (!succeeded);
 278     }
 279 }
 280
 281
 282 static inline int tMPI_Spinlock_trylock(tMPI_Spinlock_t *x)
 283 {
 284     return (tMPI_Atomic_cas( ((tMPI_Atomic_t *)x), 0, 1));
 285 }
 286
 287
 288 static inline void tMPI_Spinlock_unlock(tMPI_Spinlock_t *x)
 289 {
 290     do
 291     {
 292         tMPI_Atomic_memory_barrier();
 293         x->lock = 0;
 294     }
 295     while (0);
 296 }
 297
 298
 299 static inline int tMPI_Spinlock_islocked(const tMPI_Spinlock_t *x)
 300 {
 301     return (x->lock != 0);
 302 }
 303
 304
 305 static inline void tMPI_Spinlock_wait(tMPI_Spinlock_t *x)
 306 {
 307
 308     do
 309     {
 310         tMPI_Atomic_memory_barrier();
 311     }
 312     while (tMPI_Spinlock_islocked(x));
 313 }
 314
 315 #endif
 316
 317 #undef tMPI_ia64_fetchadd