src/external/thread_mpi/include/thread_mpi/atomic/gcc_ia64.h

   1 /*
   2    This source code file is part of thread_mpi.
   3    Written by Sander Pronk, Erik Lindahl, and possibly others.
   4
   5    Copyright (c) 2009, Sander Pronk, Erik Lindahl.
   6    All rights reserved.
   7
   8    Redistribution and use in source and binary forms, with or without
   9    modification, are permitted provided that the following conditions are met:
  10    1) Redistributions of source code must retain the above copyright
  11    notice, this list of conditions and the following disclaimer.
  12    2) Redistributions in binary form must reproduce the above copyright
  13    notice, this list of conditions and the following disclaimer in the
  14    documentation and/or other materials provided with the distribution.
  15    3) Neither the name of the copyright holders nor the
  16    names of its contributors may be used to endorse or promote products
  17    derived from this software without specific prior written permission.
  18
  19    THIS SOFTWARE IS PROVIDED BY US ''AS IS'' AND ANY
  20    EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  21    WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  22    DISCLAIMED. IN NO EVENT SHALL WE BE LIABLE FOR ANY
  23    DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  24    (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  25    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  26    ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  27    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  28    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29
  30    If you want to redistribute modifications, please consider that
  31    scientific software is very special. Version control is crucial -
  32    bugs must be traceable. We will be happy to consider code for
  33    inclusion in the official distribution, but derived work should not
  34    be called official thread_mpi. Details are found in the README & COPYING
  35    files.
  36  */
  37
  38 /* ia64 with GCC or Intel compilers. Since we need to define everything through
  39  * cmpxchg and fetchadd on ia64, we merge the different compilers and only
  40  * provide different implementations for that single function.
  41  * Documentation? Check the gcc/x86 section.
  42  */
  43
  44
  45 typedef struct tMPI_Atomic
  46 {
  47     volatile int value; /*!< Volatile, to avoid compiler aliasing */
  48 }
  49 tMPI_Atomic_t;
  50
  51 typedef struct tMPI_Atomic_ptr
  52 {
  53     void* volatile value; /*!< Volatile, to avoid compiler aliasing */
  54 }
  55 tMPI_Atomic_ptr_t;
  56
  57
  58
  59 #define tMPI_Atomic_get(a)   ((a)->value)
  60 #define tMPI_Atomic_set(a, i)  (((a)->value) = (i))
  61
  62 #define tMPI_Atomic_ptr_get(a)   ((a)->value)
  63 #define tMPI_Atomic_ptr_set(a, i)  (((a)->value) = (i))
  64
  65
  66
  67 #ifndef __INTEL_COMPILER
  68 #define TMPI_ATOMIC_HAVE_NATIVE_SWAP
  69 /* xchg operations: */
  70 /* ia64 xchg */
  71 static inline int tMPI_Atomic_swap(tMPI_Atomic_t *a, int b)
  72 {
  73     volatile int res;
  74     asm volatile ("xchg4 %0=[%1],%2" :
  75                   "=r" (res) : "r" (&a->value), "r" (b) : "memory");
  76
  77     return res;
  78 }
  79 /* ia64 ptr xchg */
  80 static inline void* tMPI_Atomic_ptr_swap(tMPI_Atomic_ptr_t * a, void *b)
  81 {
  82     void* volatile* res;
  83
  84
  85     asm volatile ("xchg8 %0=[%1],%2" :
  86                   "=r" (res) : "r" (&a->value), "r" (b) : "memory");
  87     return (void*)res;
  88 }
  89 #endif
  90
  91
  92
  93 /* do the intrinsics. icc on windows doesn't have them. */
  94 #if ( (TMPI_GCC_VERSION >= 40100) )
  95
  96 #include "gcc_intrinsics.h"
  97
  98 /* our spinlock is not really any better than gcc's based on its intrinsics */
  99 #include "gcc_spinlock.h"
 100 #else
 101
 102
 103 /* Compiler thingies */
 104 #ifdef __INTEL_COMPILER
 105 /* prototypes are neccessary for these intrisics: */
 106 #include <ia64intrin.h>
 107 void __memory_barrier(void);
 108 int _InterlockedCompareExchange(volatile int *dest, int xchg, int comp);
 109 /*void* _InterlockedCompareExchangePointer(void* volatile **dest, void* xchg,
 110                                          void* comp);*/
 111 unsigned __int64 __fetchadd4_rel(unsigned int *addend, const int increment);
 112 /* ia64 memory barrier */
 113 #define tMPI_Atomic_memory_barrier() __sync_synchronize()
 114 /* ia64 cmpxchg */
 115 #define tMPI_Atomic_cas(a, oldval, newval) \
 116     (_InterlockedCompareExchange(&((a)->value), newval, oldval) == oldval)
 117 /* ia64 pointer cmpxchg */
 118 #define tMPI_Atomic_ptr_cas(a, oldval, newval) \
 119     (_InterlockedCompareExchangePointer(&((a)->value), newval, oldval) == oldval)
 120
 121 /*#define tMPI_Atomic_ptr_cas(a, oldval, newval) __sync_val_compare_and_swap(&((a)->value),newval,oldval)*/
 122
 123
 124 /* ia64 fetchadd, but it only works with increments +/- 1,4,8,16 */
 125 #define tMPI_ia64_fetchadd(a, inc)  __fetchadd4_rel(a, inc)
 126
 127 #define tMPI_Atomic_swap(a, b) _InterlockedExchange( &((a)->value), (b))
 128 #define tMPI_Atomic_ptr_swap(a, b) _InterlockedExchangePointer( &((a)->value), (b))
 129 #define TMPI_ATOMIC_HAVE_NATIVE_SWAP
 130
 131 #elif defined __GNUC__
 132
 133 /* ia64 memory barrier */
 134 #define tMPI_Atomic_memory_barrier() asm volatile ("mf" ::: "memory")
 135
 136 /* ia64 cmpxchg */
 137 static inline int tMPI_Atomic_cas(tMPI_Atomic_t *a, int oldval, int newval)
 138 {
 139 #if GCC_VERSION < 40200
 140     volatile int res;
 141     asm volatile ("mov ar.ccv=%0;;" :: "rO" (oldval));
 142     asm volatile ("cmpxchg4.acq %0=[%1],%2,ar.ccv" :
 143                   "=r" (res) : "r" (&a->value), "r" (newval) : "memory");
 144
 145     return res == oldval;
 146 #else
 147     return __sync_bool_compare_and_swap( &(a->value), oldval, newval);
 148 #endif
 149 }
 150
 151 /* ia64 ptr cmpxchg */
 152 static inline int tMPI_Atomic_ptr_cas(tMPI_Atomic_ptr_t * a, void *oldval,
 153                                       void *newval)
 154 {
 155 #if GCC_VERSION < 40200
 156     void* volatile* res;
 157     asm volatile ("mov ar.ccv=%0;;" :: "rO" (oldval));
 158     asm volatile ("cmpxchg8.acq %0=[%1],%2,ar.ccv" :
 159                   "=r" (res) : "r" (&a->value), "r" (newval) : "memory");
 160
 161     return ((void*)res) == oldval;
 162 #else
 163     return __sync_bool_compare_and_swap( &(a->value), oldval, newval);
 164 #endif
 165 }
 166
 167
 168 /* fetchadd, but on ia64 it only works with increments +/- 1,4,8,16 */
 169 #define tMPI_ia64_fetchadd(a, inc)                                             \
 170     ({  unsigned long res;                                                        \
 171         asm volatile ("fetchadd4.rel %0=[%1],%2"                                  \
 172                       : "=r" (res) : "r" (a), "r" (inc) : "memory");                \
 173         res;                                                        \
 174      })
 175
 176
 177
 178 #else  /* Unknown compiler */
 179 #  error Unknown ia64 compiler (not GCC or ICC) - modify tMPI_Thread.h!
 180 #endif /* end of gcc/icc specific section */
 181
 182
 183
 184
 185 static inline int tMPI_Atomic_add_return(tMPI_Atomic_t *a, int i)
 186 {
 187     volatile int oldval, newval;
 188     volatile int __i = i;
 189
 190     /* Use fetchadd if, and only if, the increment value can be determined
 191      * at compile time (otherwise this check is optimized away) and it is
 192      * a value supported by fetchadd (1,4,8,16,-1,-4,-8,-16).
 193      */
 194     if (__builtin_constant_p(i) &&
 195         ( (__i ==   1) || (__i ==   4)  || (__i ==   8) || (__i ==  16) ||
 196           (__i ==  -1) || (__i ==  -4)  || (__i ==  -8) || (__i == -16) ) )
 197     {
 198         oldval = tMPI_ia64_fetchadd((unsigned int*)&(a->value), __i);
 199         newval = oldval + i;
 200     }
 201     else
 202     {
 203         /* Use compare-exchange addition that works with any value */
 204         do
 205         {
 206             oldval = tMPI_Atomic_get(a);
 207             newval = oldval + i;
 208         }
 209         while (!tMPI_Atomic_cas(a, oldval, newval));
 210     }
 211     return (int)newval;
 212 }
 213 #define TMPI_ATOMIC_HAVE_NATIVE_ADD_RETURN
 214
 215
 216
 217 static inline int tMPI_Atomic_fetch_add(tMPI_Atomic_t *a, int i)
 218 {
 219     volatile int oldval, newval;
 220     volatile int __i = i;
 221
 222     /* Use ia64 fetchadd if, and only if, the increment value can be determined
 223      * at compile time (otherwise this check is optimized away) and it is
 224      * a value supported by fetchadd (1,4,8,16,-1,-4,-8,-16).
 225      */
 226     if (__builtin_constant_p(i) &&
 227         ( (__i ==   1) || (__i ==   4)  || (__i ==   8) || (__i ==  16) ||
 228           (__i ==  -1) || (__i ==  -4)  || (__i ==  -8) || (__i == -16) ) )
 229     {
 230         oldval = tMPI_ia64_fetchadd((unsigned int*)&(a->value), __i);
 231         newval = oldval + i;
 232     }
 233     else
 234     {
 235         /* Use compare-exchange addition that works with any value */
 236         do
 237         {
 238             oldval = tMPI_Atomic_get(a);
 239             newval = oldval + i;
 240         }
 241         while (!tMPI_Atomic_cas(a, oldval, newval));
 242     }
 243     return (int)oldval;
 244 }
 245 #define TMPI_ATOMIC_HAVE_NATIVE_FETCH_ADD
 246
 247 #endif
 248
 249 #undef tMPI_ia64_fetchadd