2 This source code file is part of thread_mpi.
3 Written by Sander Pronk, Erik Lindahl, and possibly others.
5 Copyright (c) 2009, Sander Pronk, Erik Lindahl.
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are met:
10 1) Redistributions of source code must retain the above copyright
11 notice, this list of conditions and the following disclaimer.
12 2) Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15 3) Neither the name of the copyright holders nor the
16 names of its contributors may be used to endorse or promote products
17 derived from this software without specific prior written permission.
19 THIS SOFTWARE IS PROVIDED BY US ''AS IS'' AND ANY
20 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 DISCLAIMED. IN NO EVENT SHALL WE BE LIABLE FOR ANY
23 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
26 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 If you want to redistribute modifications, please consider that
31 scientific software is very special. Version control is crucial -
32 bugs must be traceable. We will be happy to consider code for
33 inclusion in the official distribution, but derived work should not
34 be called official thread_mpi. Details are found in the README & COPYING
38 /* ia64 with GCC or Intel compilers. Since we need to define everything through
39 * cmpxchg and fetchadd on ia64, we merge the different compilers and only
40 * provide different implementations for that single function.
41 * Documentation? Check the gcc/x86 section.
45 typedef struct tMPI_Atomic
47 volatile int value; /*!< Volatile, to avoid compiler aliasing */
51 typedef struct tMPI_Atomic_ptr
53 void* volatile value; /*!< Volatile, to avoid compiler aliasing */
58 #define TMPI_SPINLOCK_INITIALIZER { 0 }
61 #define tMPI_Atomic_get(a) ((a)->value)
62 #define tMPI_Atomic_set(a, i) (((a)->value) = (i))
64 #define tMPI_Atomic_ptr_get(a) ((a)->value)
65 #define tMPI_Atomic_ptr_set(a, i) (((a)->value) = (i))
69 #ifndef __INTEL_COMPILER
70 #define TMPI_HAVE_SWAP
71 /* xchg operations: */
73 static inline int tMPI_Atomic_swap(tMPI_Atomic_t *a, int b)
76 asm volatile ("xchg4 %0=[%1],%2" :
77 "=r" (res) : "r" (&a->value), "r" (b) : "memory");
82 static inline void* tMPI_Atomic_ptr_swap(tMPI_Atomic_ptr_t * a, void *b)
87 asm volatile ("xchg8 %0=[%1],%2" :
88 "=r" (res) : "r" (&a->value), "r" (b) : "memory");
95 /* do the intrinsics. icc on windows doesn't have them. */
96 #if ( (TMPI_GCC_VERSION >= 40100) )
98 #include "gcc_intrinsics.h"
100 /* our spinlock is not really any better than gcc's based on its intrinsics */
101 #include "gcc_spinlock.h"
105 /* Compiler thingies */
106 #ifdef __INTEL_COMPILER
107 /* prototypes are neccessary for these intrisics: */
108 #include <ia64intrin.h>
109 void __memory_barrier(void);
110 int _InterlockedCompareExchange(volatile int *dest, int xchg, int comp);
111 /*void* _InterlockedCompareExchangePointer(void* volatile **dest, void* xchg,
113 unsigned __int64 __fetchadd4_rel(unsigned int *addend, const int increment);
114 /* ia64 memory barrier */
115 /*#define tMPI_Atomic_memory_barrier() __memory_barrier()*/
116 #define tMPI_Atomic_memory_barrier() __sync_synchronize()
118 #define tMPI_Atomic_cas(a, oldval, newval) \
119 (_InterlockedCompareExchange(&((a)->value), newval, oldval) == oldval)
120 /* ia64 pointer cmpxchg */
121 #define tMPI_Atomic_ptr_cas(a, oldval, newval) \
122 (_InterlockedCompareExchangePointer(&((a)->value), newval, oldval) == oldval)
124 /*#define tMPI_Atomic_ptr_cas(a, oldval, newval) __sync_val_compare_and_swap(&((a)->value),newval,oldval)*/
127 /* ia64 fetchadd, but it only works with increments +/- 1,4,8,16 */
128 #define tMPI_ia64_fetchadd(a, inc) __fetchadd4_rel(a, inc)
130 #define TMPI_HAVE_SWAP
131 #define tMPI_Atomic_swap(a, b) _InterlockedExchange( &((a)->value), (b))
132 #define tMPI_Atomic_ptr_swap(a, b) _InterlockedExchangePointer( &((a)->value), (b))
134 #elif defined __GNUC__
136 /* ia64 memory barrier */
137 #define tMPI_Atomic_memory_barrier() asm volatile ("mf" ::: "memory")
140 static inline int tMPI_Atomic_cas(tMPI_Atomic_t *a, int oldval, int newval)
142 #if GCC_VERSION < 40200
144 asm volatile ("mov ar.ccv=%0;;" :: "rO" (oldval));
145 asm volatile ("cmpxchg4.acq %0=[%1],%2,ar.ccv" :
146 "=r" (res) : "r" (&a->value), "r" (newval) : "memory");
148 return res == oldval;
150 return __sync_bool_compare_and_swap( &(a->value), oldval, newval);
154 /* ia64 ptr cmpxchg */
155 static inline int tMPI_Atomic_ptr_cas(tMPI_Atomic_ptr_t * a, void *oldval,
158 #if GCC_VERSION < 40200
160 asm volatile ("mov ar.ccv=%0;;" :: "rO" (oldval));
161 asm volatile ("cmpxchg8.acq %0=[%1],%2,ar.ccv" :
162 "=r" (res) : "r" (&a->value), "r" (newval) : "memory");
164 return ((void*)res) == oldval;
166 return __sync_bool_compare_and_swap( &(a->value), oldval, newval);
171 /* fetchadd, but on ia64 it only works with increments +/- 1,4,8,16 */
172 #define tMPI_ia64_fetchadd(a, inc) \
173 ({ unsigned long res; \
174 asm volatile ("fetchadd4.rel %0=[%1],%2" \
175 : "=r" (res) : "r" (a), "r" (inc) : "memory"); \
181 #else /* Unknown compiler */
182 # error Unknown ia64 compiler (not GCC or ICC) - modify tMPI_Thread.h!
183 #endif /* end of gcc/icc specific section */
188 static inline int tMPI_Atomic_add_return(tMPI_Atomic_t *a, int i)
190 volatile int oldval, newval;
191 volatile int __i = i;
193 /* Use fetchadd if, and only if, the increment value can be determined
194 * at compile time (otherwise this check is optimized away) and it is
195 * a value supported by fetchadd (1,4,8,16,-1,-4,-8,-16).
197 if (__builtin_constant_p(i) &&
198 ( (__i == 1) || (__i == 4) || (__i == 8) || (__i == 16) ||
199 (__i == -1) || (__i == -4) || (__i == -8) || (__i == -16) ) )
201 oldval = tMPI_ia64_fetchadd((unsigned int*)&(a->value), __i);
206 /* Use compare-exchange addition that works with any value */
209 oldval = tMPI_Atomic_get(a);
212 while (!tMPI_Atomic_cas(a, oldval, newval));
219 static inline int tMPI_Atomic_fetch_add(tMPI_Atomic_t *a, int i)
221 volatile int oldval, newval;
222 volatile int __i = i;
224 /* Use ia64 fetchadd if, and only if, the increment value can be determined
225 * at compile time (otherwise this check is optimized away) and it is
226 * a value supported by fetchadd (1,4,8,16,-1,-4,-8,-16).
228 if (__builtin_constant_p(i) &&
229 ( (__i == 1) || (__i == 4) || (__i == 8) || (__i == 16) ||
230 (__i == -1) || (__i == -4) || (__i == -8) || (__i == -16) ) )
232 oldval = tMPI_ia64_fetchadd((unsigned int*)&(a->value), __i);
237 /* Use compare-exchange addition that works with any value */
240 oldval = tMPI_Atomic_get(a);
243 while (!tMPI_Atomic_cas(a, oldval, newval));
248 typedef struct tMPI_Spinlock
250 volatile unsigned int lock; /*!< Volatile, to avoid compiler aliasing */
256 static inline void tMPI_Spinlock_init(tMPI_Spinlock_t *x)
262 static inline void tMPI_Spinlock_lock(tMPI_Spinlock_t *x)
264 tMPI_Atomic_t *a = (tMPI_Atomic_t *) x;
266 succeeded = tMPI_Atomic_cas(a, 0, 1);
271 while (a->value != 0)
273 tMPI_Atomic_memory_barrier();
275 succeeded = tMPI_Atomic_cas(a, 0, 1);
282 static inline int tMPI_Spinlock_trylock(tMPI_Spinlock_t *x)
284 return (tMPI_Atomic_cas( ((tMPI_Atomic_t *)x), 0, 1));
288 static inline void tMPI_Spinlock_unlock(tMPI_Spinlock_t *x)
292 tMPI_Atomic_memory_barrier();
299 static inline int tMPI_Spinlock_islocked(const tMPI_Spinlock_t *x)
301 return (x->lock != 0);
305 static inline void tMPI_Spinlock_wait(tMPI_Spinlock_t *x)
310 tMPI_Atomic_memory_barrier();
312 while (tMPI_Spinlock_islocked(x));
317 #undef tMPI_ia64_fetchadd