2 This source code file is part of thread_mpi.
3 Written by Sander Pronk, Erik Lindahl, and possibly others.
5 Copyright (c) 2009, Sander Pronk, Erik Lindahl.
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are met:
10 1) Redistributions of source code must retain the above copyright
11 notice, this list of conditions and the following disclaimer.
12 2) Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15 3) Neither the name of the copyright holders nor the
16 names of its contributors may be used to endorse or promote products
17 derived from this software without specific prior written permission.
19 THIS SOFTWARE IS PROVIDED BY US ''AS IS'' AND ANY
20 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 DISCLAIMED. IN NO EVENT SHALL WE BE LIABLE FOR ANY
23 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
26 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 If you want to redistribute modifications, please consider that
31 scientific software is very special. Version control is crucial -
32 bugs must be traceable. We will be happy to consider code for
33 inclusion in the official distribution, but derived work should not
34 be called official thread_mpi. Details are found in the README & COPYING
38 /* PowerPC using xlC inline assembly.
39 * Recent versions of xlC (>=7.0) _partially_ support GCC inline assembly
40 * if you use the option -qasm=gcc but we have had to hack things a bit, in
41 * particular when it comes to clobbered variables. Since this implementation
42 * _could_ be buggy, we have separated it from the known-to-be-working gcc
45 * For now, we just disable the inline keyword if we're compiling C code:
48 #if (!defined(__cplusplus)) && (!defined(inline))
49 #define inline_defined_in_atomic 1
55 /* IBM xlC compiler */
61 #define TMPI_XLC_INTRINSICS
63 /* ppc has many memory synchronization instructions */
64 /*#define tMPI_Atomic_memory_barrier() { __fence(); __sync(); __fence();}*/
65 /*#define tMPI_Atomic_memory_barrier() __isync();*/
66 /*#define tMPI_Atomic_memory_barrier() __lwsync();*/
68 /* for normal memory, this should be enough: */
69 #define tMPI_Atomic_memory_barrier() { __fence(); __eieio(); __fence(); }
70 #define tMPI_Atomic_memory_barrier_acq() { __eieio(); __fence(); }
71 #define tMPI_Atomic_memory_barrier_rel() { __fence(); __eieio(); }
72 #define TMPI_HAVE_ACQ_REL_BARRIERS
74 /*#define tMPI_Atomic_memory_barrier() __eieio();*/
77 typedef struct tMPI_Atomic
79 volatile int value __attribute__ ((aligned(64)));
84 typedef struct tMPI_Atomic_ptr
86 /* volatile char* volatile is not a bug, but means a volatile pointer
87 to a volatile value. This is needed for older versions of
89 volatile char* volatile value __attribute__ ((aligned(64))); /*!< Volatile, to avoid compiler aliasing */
94 typedef struct tMPI_Spinlock
96 volatile int lock __attribute__ ((aligned(64)));
99 #define TMPI_ATOMIC_HAVE_NATIVE_SPINLOCK
104 #define tMPI_Atomic_get(a) (int)((a)->value)
105 #define tMPI_Atomic_set(a, i) (((a)->value) = (i))
106 #define tMPI_Atomic_ptr_get(a) ((a)->value)
107 #define tMPI_Atomic_ptr_set(a, i) (((a)->value) = (i))
109 #define TMPI_SPINLOCK_INITIALIZER { 0 }
112 static inline int tMPI_Atomic_cas(tMPI_Atomic_t *a, int oldval, int newval)
114 #ifdef TMPI_XLC_INTRINSICS
117 __fence(); /* this one needs to be here to avoid ptr. aliasing issues */
119 ret = (__compare_and_swap(&(a->value), &oldval, newval));
121 __fence(); /* and this one needs to be here to avoid aliasing issues */
125 __asm__ __volatile__ ("1: lwarx %0,0,%2 \n"
128 "\t stwcx. %4,0,%2 \n"
132 : "=&r" (prev), "=m" (a->value)
133 : "r" (&a->value), "r" (oldval), "r" (newval),
136 return prev == oldval;
141 static inline int tMPI_Atomic_ptr_cas(tMPI_Atomic_ptr_t *a, void* oldval,
145 volatile char* volatile oldv = (char*)oldval;
146 volatile char* volatile newv = (char*)newval;
148 __fence(); /* this one needs to be here to avoid ptr. aliasing issues */
150 #if (!defined (__LP64__) ) && (!defined(__powerpc64__) )
151 ret = __compare_and_swap((int *)&(a->value), (int*)&oldv, (int)newv);
153 ret = __compare_and_swaplp((long *)&(a->value), (long*)&oldv, (long)newv);
164 static inline int tMPI_Atomic_add_return(tMPI_Atomic_t *a, int i)
166 #ifdef TMPI_XLC_INTRINSICS
172 __eieio(); /* these memory barriers are neccesary */
173 oldval = tMPI_Atomic_get(a);
176 /*while(!__compare_and_swap( &(a->value), &oldval, newval));*/
177 while (__check_lock_mp( (int*)&(a->value), oldval, newval));
186 __asm__ __volatile__("1: lwarx %0,0,%2 \n"
188 "\t stwcx. %0,0,%2 \n"
192 : "r" (i), "r" (&a->value) );
196 #define TMPI_ATOMIC_HAVE_NATIVE_ADD_RETURN
200 static inline int tMPI_Atomic_fetch_add(tMPI_Atomic_t *a, int i)
202 #ifdef TMPI_XLC_INTRINSICS
208 __eieio(); /* these memory barriers are neccesary */
209 oldval = tMPI_Atomic_get(a);
212 /*while(__check_lock_mp((const int*)&(a->value), oldval, newval));*/
213 while (__check_lock_mp( (int*)&(a->value), oldval, newval));
214 /*while(!__compare_and_swap( &(a->value), &oldval, newval));*/
222 __asm__ __volatile__("\t eieio\n"
223 "1: lwarx %0,0,%2 \n"
225 "\t stwcx. %0,0,%2 \n"
229 : "r" (i), "r" (&a->value));
234 #define TMPI_ATOMIC_HAVE_NATIVE_FETCH_ADD
237 static inline void tMPI_Spinlock_init(tMPI_Spinlock_t *x)
240 __clear_lock_mp((const int*)x, 0);
245 static inline void tMPI_Spinlock_lock(tMPI_Spinlock_t *x)
251 while (__check_lock_mp((int*)&(x->lock), 0, 1));
252 tMPI_Atomic_memory_barrier_acq();
256 static inline int tMPI_Spinlock_trylock(tMPI_Spinlock_t *x)
259 /* Return 0 if we got the lock */
261 ret = __check_lock_mp((int*)&(x->lock), 0, 1);
262 tMPI_Atomic_memory_barrier_acq();
267 static inline void tMPI_Spinlock_unlock(tMPI_Spinlock_t *x)
269 tMPI_Atomic_memory_barrier_rel();
270 __clear_lock_mp((int*)&(x->lock), 0);
274 static inline int tMPI_Spinlock_islocked(const tMPI_Spinlock_t *x)
278 ret = ((x->lock) != 0);
279 tMPI_Atomic_memory_barrier_acq();
284 static inline void tMPI_Spinlock_wait(tMPI_Spinlock_t *x)
289 while (tMPI_Spinlock_islocked(x));