#elif defined(__xlC__) && defined (_AIX)
/* IBM xlC compiler on AIX */
-#include "atomic/xlc_aix.h"
+#include "atomic/xlc_ppc.h"
#elif (defined(__hpux) || defined(__HP_cc)) && defined(__ia64)
/* HP compiler on ia64 */
* The \a old value is compared with the memory value in the atomic datatype.
* If the are identical, the atomic type is swapped with the new value,
* and otherwise left unchanged.
+ *
+ * This is *the* synchronization primitive: it has a consensus number of
+ * infinity, and is available in some form on all modern CPU architectures.
+ * In the words of Herlihy&Shavit (The art of multiprocessor programming),
+ * it is the 'king of all wild things'.
*
- * This is a very useful synchronization primitive: You can start by reading
- * a value (without locking anything), perform some calculations, and then
+ * In practice, use it as follows: You can start by reading a value
+ * (without locking anything), perform some calculations, and then
* atomically try to update it in memory unless it has changed. If it has
* changed you will get an error return code - reread the new value
* an repeat the calculations in that case.
* \param new_val New value to write to the atomic type if it currently is
* identical to the old value.
*
- * \return The value of the atomic memory variable in memory when this
- * instruction was executed. This, if the operation succeeded the
- * return value was identical to the \a old parameter, and if not
- * it returns the updated value in memory so you can repeat your
- * operations on it.
- *
+ * \return True (1) if the swap occurred: i.e. if the value in a was equal
+ * to old_val. False (0) if the swap didn't occur and the value
+ * was not equal to old_val.
+ *
* \note The exchange occured if the return value is identical to \a old.
*/
static inline int tMPI_Atomic_cas(tMPI_Atomic_t *a, int old_val, int new_val)
{
- int t;
+ int t=0;
tMPI_Thread_mutex_lock(&tMPI_Atomic_mutex);
- t=old_val;
if (a->value == old_val)
{
a->value = new_val;
+ t=1;
}
tMPI_Thread_mutex_unlock(&tMPI_Atomic_mutex);
return t;
* and otherwise left unchanged.
*
* This is essential for implementing wait-free lists and other data
- * structures.
+ * structures. See 'tMPI_Atomic_cas()'.
*
* \param a Atomic datatype ('memory' value)
* \param old_val Pointer value read from the atomic type at an earlier point
* \param new_val New value to write to the atomic type if it currently is
* identical to the old value.
*
- * \return The value of the atomic pointer in memory when this
- * instruction was executed. This, if the operation succeeded the
- * return value was identical to the \a old parameter, and if not
- * it returns the updated value in memory so you can repeat your
- * operations on it.
- *
+ * \return True (1) if the swap occurred: i.e. if the value in a was equal
+ * to old_val. False (0) if the swap didn't occur and the value
+ * was not equal to old_val.
+ *
* \note The exchange occured if the return value is identical to \a old.
*/
-static inline void* tMPI_Atomic_ptr_cas(tMPI_Atomic_ptr_t * a, void *old_val,
- void *new_val)
+static inline int tMPI_Atomic_ptr_cas(tMPI_Atomic_ptr_t * a, void *old_val,
+ void *new_val)
{
- void *t;
+ int t=0;
tMPI_Thread_mutex_lock(&tMPI_Atomic_mutex);
- t=old_val;
if (a->value == old_val)
{
a->value = new_val;
+ t=1;
}
tMPI_Thread_mutex_unlock(&tMPI_Atomic_mutex);
return t;
do
{
oldval=(int)(a->value);
- } while(tMPI_Atomic_cas(a, oldval, b) != oldval);
+ } while(!tMPI_Atomic_cas(a, oldval, b));
return oldval;
}
/** Atomic swap pointer operation.
do
{
oldval=(void*)(a->value);
- } while(tMPI_Atomic_ptr_cas(a, oldval, b) != oldval);
+ } while(!tMPI_Atomic_ptr_cas(a, oldval, b));
return oldval;
}
pkgincludethread_mpiatomicdir = ${pkgincludedir}/thread_mpi/atomic
pkgincludethread_mpiatomic_HEADERS = \
- gcc.h gcc_ppc.h hpux.h xlc_aix.h \
+ gcc.h gcc_ppc.h hpux.h \
gcc_ia64.h gcc_spinlock.h msvc.h xlc_ppc.h \
gcc_intrinsics.h gcc_x86.h
#define tMPI_Atomic_memory_barrier() __sync_synchronize()
/* ia64 cmpxchg */
#define tMPI_Atomic_cas(a, oldval, newval) \
- _InterlockedCompareExchange(&((a)->value),newval,oldval)
+ (_InterlockedCompareExchange(&((a)->value),newval,oldval) == oldval)
/* ia64 pointer cmpxchg */
#define tMPI_Atomic_ptr_cas(a, oldval, newval) \
- _InterlockedCompareExchangePointer(&((a)->value),newval,oldval)
+ (_InterlockedCompareExchangePointer(&((a)->value),newval,oldval)==oldval)
/*#define tMPI_Atomic_ptr_cas(a, oldval, newval) __sync_val_compare_and_swap(&((a)->value),newval,oldval)*/
asm volatile ("cmpxchg4.acq %0=[%1],%2,ar.ccv":
"=r"(res) : "r"(&a->value), "r"(newval) : "memory");
- return res;
+ return res==oldval;
#else
- return __sync_val_compare_and_swap( &(a->value), oldval, newval);
+ return __sync_bool_compare_and_swap( &(a->value), oldval, newval);
#endif
}
/* ia64 ptr cmpxchg */
-static inline void* tMPI_Atomic_ptr_cas(tMPI_Atomic_ptr_t * a, void *oldval,
- void *newval)
+static inline int tMPI_Atomic_ptr_cas(tMPI_Atomic_ptr_t * a, void *oldval,
+ void *newval)
{
#if GCC_VERSION < 40200
void* volatile* res;
asm volatile ("cmpxchg8.acq %0=[%1],%2,ar.ccv":
"=r"(res) : "r"(&a->value), "r"(newval) : "memory");
- return (void*)res;
+ return ((void*)res)==oldval;
#else
- return (void*)__sync_val_compare_and_swap( &(a->value), oldval, newval);
+ return __sync_bool_compare_and_swap( &(a->value), oldval, newval);
#endif
}
oldval = tMPI_Atomic_get(a);
newval = oldval + i;
}
- while(tMPI_Atomic_cas(a,oldval,newval) != oldval);
+ while(!tMPI_Atomic_cas(a,oldval,newval));
}
return (int)newval;
}
oldval = tMPI_Atomic_get(a);
newval = oldval + i;
}
- while(tMPI_Atomic_cas(a,oldval,newval) != oldval);
+ while(!tMPI_Atomic_cas(a,oldval,newval));
}
return (int)oldval;
}
static inline void tMPI_Spinlock_lock(tMPI_Spinlock_t *x)
{
tMPI_Atomic_t *a = (tMPI_Atomic_t *) x;
- unsigned long value;
- value = tMPI_Atomic_cas(a, 0, 1);
- if (value)
+ int succeeded;
+ succeeded = tMPI_Atomic_cas(a, 0, 1);
+ if (!succeeded)
{
do
{
{
tMPI_Atomic_memory_barrier();
}
- value = tMPI_Atomic_cas(a, 0, 1);
+ succeeded = tMPI_Atomic_cas(a, 0, 1);
}
- while (value);
+ while (!succeeded);
}
}
static inline int tMPI_Spinlock_trylock(tMPI_Spinlock_t *x)
{
- return (tMPI_Atomic_cas( ((tMPI_Atomic_t *)x), 0, 1) != 0);
+ return (tMPI_Atomic_cas( ((tMPI_Atomic_t *)x), 0, 1));
}
static inline int tMPI_Atomic_cas(tMPI_Atomic_t *a, int oldval, int newval)
{
- return __sync_val_compare_and_swap( &(a->value), oldval, newval);
+ return __sync_bool_compare_and_swap( &(a->value), oldval, newval);
}
-static inline volatile void* tMPI_Atomic_ptr_cas(tMPI_Atomic_ptr_t* a,
- void *oldval,
- void *newval)
+static inline int tMPI_Atomic_ptr_cas(tMPI_Atomic_ptr_t* a, void *oldval,
+ void *newval)
{
#if !defined(__INTEL_COMPILER)
- return __sync_val_compare_and_swap( &(a->value), oldval, newval);
+ return __sync_bool_compare_and_swap( &(a->value), oldval, newval);
#else
/* the intel compilers need integer type arguments for compare_and_swap.
on the platforms supported by icc, size_t is always the size of
a pointer. */
- return (volatile void*)__sync_val_compare_and_swap( (size_t*)&(a->value),
- (size_t)oldval,
- (size_t)newval);
+ return (__sync_bool_compare_and_swap( (size_t*)&(a->value),
+ (size_t)oldval,
+ (size_t)newval) );
#endif
}
*/
+/* this file is not used any more. gcc intrinsics take care of it */
typedef struct tMPI_Atomic
{
"m" (a->value)
: "cc", "memory");
- return prev;
+ return prev==oldval;
}
-static inline void* tMPI_Atomic_ptr_cas(tMPI_Atomic_ptr_t *a, void *oldval,
+static inline int tMPI_Atomic_ptr_cas(tMPI_Atomic_ptr_t *a, void *oldval,
void *newval)
{
void *prev;
"m" (a->value)
: "cc", "memory");
#endif
- return prev;
+ return prev==oldval;
}
static inline int tMPI_Atomic_add_return(tMPI_Atomic_t *a, int i)
: "q"(newval), "m"(a->value), "0"(oldval)
: "memory");
- return prev;
+ return prev==oldval;
}
-static inline void* tMPI_Atomic_ptr_cas(tMPI_Atomic_ptr_t *a,
- void *oldval,
- void *newval)
+static inline int tMPI_Atomic_ptr_cas(tMPI_Atomic_ptr_t *a,
+ void *oldval,
+ void *newval)
{
void* prev;
#ifndef __x86_64__
: "q"(newval), "m"(a->value), "0"(oldval)
: "memory");
#endif
- return prev;
+ return prev==oldval;
}
#endif /* end of check for gcc intrinsics */
ret = _Asm_cmpxchg((_Asm_sz)SZ_W,(_Asm_sem)_SEM_ACQ,(Uint32*)a,
(Uint32)newval,(_Asm_ldhint)_LDHINT_NONE);
- return ret;
+ return ret==oldval;
}
ret = _Asm_cmpxchg((_Asm_sz)SZ_W,(_Asm_sem)_SEM_ACQ,(Uint64)a,
(Uint64)newval,(_Asm_ldhint)_LDHINT_NONE);
- return ret;
+ return ret==oldval;
}
oldval = tMPI_Atomic_get(a);
newval = oldval + i;
}
- while(tMPI_Atomic_cas(a,oldval,newval) != oldval);
+ while(!tMPI_Atomic_cas(a,oldval,newval));
}
return newval;
}
oldval = tMPI_Atomic_get(a);
newval = oldval + i;
}
- while(tMPI_Atomic_cas(a,oldval,newval) != oldval);
+ while(!tMPI_Atomic_cas(a,oldval,newval));
}
return oldval;
}
( (i) + InterlockedExchangeAdd((LONG volatile *)(a), (LONG) (i)) )
#define tMPI_Atomic_cas(a, oldval, newval) \
- InterlockedCompareExchange((LONG volatile *)(a), (LONG) (newval), (LONG) (oldval))
+ (InterlockedCompareExchange((LONG volatile *)(a), (LONG) (newval), (LONG) (oldval)) == (LONG)oldval)
#define tMPI_Atomic_ptr_cas(a, oldval, newval) \
- InterlockedCompareExchangePointer(&((a)->value), (PVOID) (newval), \
- (PVOID) (oldval))
+ (InterlockedCompareExchangePointer(&((a)->value), (PVOID) (newval), \
+ (PVOID) (oldval)) == (PVOID)oldval)
#define tMPI_Atomic_swap(a, b) \
InterlockedExchange((LONG volatile *)(a), (LONG) (b))
+++ /dev/null
-/*
-This source code file is part of thread_mpi.
-Written by Sander Pronk, Erik Lindahl, and possibly others.
-
-Copyright (c) 2009, Sander Pronk, Erik Lindahl.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-1) Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
-2) Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
-3) Neither the name of the copyright holders nor the
- names of its contributors may be used to endorse or promote products
- derived from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY US ''AS IS'' AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL WE BE LIABLE FOR ANY
-DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-If you want to redistribute modifications, please consider that
-scientific software is very special. Version control is crucial -
-bugs must be traceable. We will be happy to consider code for
-inclusion in the official distribution, but derived work should not
-be called official thread_mpi. Details are found in the README & COPYING
-files.
-*/
-
-/* IBM xlC compiler on AIX */
-#include <sys/atomic_op.h>
-
-
-#define tMPI_Atomic_memory_barrier()
-
-
-typedef struct tMPI_Atomic
-{
- volatile int value; /*!< Volatile, to avoid compiler aliasing */
-}
-tMPI_Atomic_t;
-
-
-typedef struct tMPI_Atomic_ptr
-{
- void* volatile* value; /*!< Volatile, to avoid compiler aliasing */
-}
-tMPI_Atomic_ptr_t;
-
-
-typedef struct tMPI_Spinlock
-{
- volatile unsigned int lock; /*!< Volatile, to avoid compiler aliasing */
-}
-tMPI_Spinlock_t;
-
-
-static inline int tMPI_Atomic_cas(tMPI_Atomic_t *a, int oldval, int newval)
-{
- int t;
-
- if(__check_lock((atomic_p)&a->value, oldval, newval))
- {
- /* Not successful - value had changed in memory. Reload value. */
- t = a->value;
- }
- else
- {
- /* replacement suceeded */
- t = oldval;
- }
- return t;
-}
-
-
-static inline void* tMPI_Atomic_ptr_cas(tMPI_Atomic_ptr_t *a, void* oldval,
- void* newval)
-{
- void *t;
-
- if(__check_lock((atomic_p)&a->value, oldval, newval))
- {
- /* Not successful - value had changed in memory. Reload value. */
- t = a->value;
- }
- else
- {
- /* replacement suceeded */
- t = oldval;
- }
- return t;
-}
-
-
-
-
-static inline void tMPI_Atomic_add_return(tMPI_Atomic_t *a, int i)
-{
- int oldval,newval;
-
- do
- {
- oldval = tMPI_Atomic_get(a);
- newval = oldval + i;
- }
- while(__check_lock((atomic_p)&a->value, oldval, newval));
-
- return newval;
-}
-
-
-
-static inline void tMPI_Atomic_fetch_add(tMPI_Atomic_t *a, int i)
-{
- int oldval,newval;
-
- do
- {
- oldval = tMPI_Atomic_get(a);
- newval = oldval + i;
- }
- while(__check_lock((atomic_p)&a->value, oldval, newval));
-
- return oldval;
-}
-
-
-static inline void tMPI_Spinlock_init(tMPI_Spinlock_t *x)
-{
- __clear_lock((atomic_p)x,0);
-}
-
-
-static inline void tMPI_Spinlock_lock(tMPI_Spinlock_t *x)
-{
- do
- {
- ;
- }
- while(__check_lock((atomic_p)x, 0, 1));
-}
-
-
-static inline void tMPI_Spinlock_trylock(tMPI_Spinlock_t *x)
-{
- /* Return 0 if we got the lock */
- return (__check_lock((atomic_p)x, 0, 1) != 0)
-}
-
-
-static inline void tMPI_Spinlock_unlock(tMPI_Spinlock_t *x)
-{
- __clear_lock((atomic_p)x,0);
-}
-
-
-static inline void tMPI_Spinlock_islocked(const tMPI_Spinlock_t *x)
-{
- return (*((atomic_p)x) != 0);
-}
-
-
-static inline void tMPI_Spinlock_wait(tMPI_Spinlock_t *x)
-{
- while(spin_islocked(x)) { ; }
-}
-
-
*
* For now, we just disable the inline keyword if we're compiling C code:
*/
+#if 1
#if (!defined(__cplusplus)) && (!defined(inline))
#define inline_defined_in_atomic 1
#define inline
#endif
+#endif
+
+
+/* IBM xlC compiler */
+#ifdef __cplusplus
+#include <builtins.h>
+#endif
-#define tMPI_Atomic_memory_barrier() { __asm__ __volatile__("\t isync\n"\
- : : :"memory" ); }
+#define TMPI_XLC_INTRINSICS
+/* ppc has many memory synchronization instructions */
+/*#define tMPI_Atomic_memory_barrier() __sync();*/
+/*#define tMPI_Atomic_memory_barrier() __isync();*/
+/*#define tMPI_Atomic_memory_barrier() __lwsync();*/
+/* for normal memory, this should be enough: */
+#define tMPI_Atomic_memory_barrier() __eieio();
typedef struct tMPI_Atomic
{
- volatile int value; /*!< Volatile, to avoid compiler aliasing */
+ int value __attribute__ ((aligned(64)));
}
tMPI_Atomic_t;
typedef struct tMPI_Atomic_ptr
{
- void* volatile *value; /*!< Volatile, to avoid compiler aliasing */
+ volatile char* volatile* value __attribute__ ((aligned(64))); /*!< Volatile, to avoid compiler aliasing */
}
tMPI_Atomic_ptr_t;
-
typedef struct tMPI_Spinlock
{
- volatile unsigned int lock; /*!< Volatile, to avoid compiler aliasing */
+ int lock __attribute__ ((aligned(64)));
}
tMPI_Spinlock_t;
+#define tMPI_Atomic_get(a) (int)((a)->value)
+#define tMPI_Atomic_set(a,i) (((a)->value) = (i))
+#define tMPI_Atomic_ptr_get(a) ((a)->value)
+#define tMPI_Atomic_ptr_set(a,i) (((a)->value) = (i))
+
#define TMPI_SPINLOCK_INITIALIZER { 0 }
-#define tMPI_Atomic_get(a) (int)((a)->value)
-#define tMPI_Atomic_set(a,i) (((a)->value) = (i))
-#define tMPI_Atomic_ptr_get(a) (void*)((a)->value)
-#define tMPI_Atomic_ptr_set(a,i) (((a)->value) = (void*)(i))
+static inline int tMPI_Atomic_cas(tMPI_Atomic_t *a, int oldval, int newval)
+{
+#ifdef TMPI_XLC_INTRINSICS
+ int ret;
+
+ __eieio(); /* these memory barriers are neccesary */
+ __fence(); /* and this one needs to be here to avoid aliasing issues */
+ ret=(__compare_and_swap(&(a->value), &oldval, newval));
+ __isync();
+ __fence(); /* and this one needs to be here to avoid aliasing issues */
+ return ret;
+#else
+ int prev;
+ __asm__ __volatile__ ("1: lwarx %0,0,%2 \n"
+ "\t cmpw 0,%0,%3 \n"
+ "\t bne 2f \n"
+ "\t stwcx. %4,0,%2 \n"
+ "\t bne- 1b \n"
+ "\t sync \n"
+ "2: \n"
+ : "=&r" (prev), "=m" (a->value)
+ : "r" (&a->value), "r" (oldval), "r" (newval),
+ "m" (a->value));
+
+ return prev==oldval;
+#endif
+}
-static int tMPI_Atomic_add_return(tMPI_Atomic_t * a,
- int i)
+static inline int tMPI_Atomic_ptr_cas(tMPI_Atomic_ptr_t *a, void* oldval,
+ void* newval)
{
- int t;
+ int ret;
+ volatile char* volatile* oldv=oldval;
+ volatile char* volatile* newv=newval;
+
+ __eieio(); /* these memory barriers are neccesary */
+ __fence(); /* and this one needs to be here to avoid aliasing issues */
+#if (!defined (__LP64__) ) && (!defined(__powerpc64__) )
+ ret=__compare_and_swap((int *)&(a->value), (int*)&oldv, (int)newv);
+#else
+ ret=__compare_and_swaplp((long *)&(a->value), (long*)&oldv, (long)newv);
+#endif
+ __isync();
+ __fence();
+
+ return ret;
+}
+
+
+
+
+static inline int tMPI_Atomic_add_return(tMPI_Atomic_t *a, int i)
+{
+#ifdef TMPI_XLC_INTRINSICS
+ int oldval, newval;
+ do
+ {
+ __eieio(); /* these memory barriers are neccesary */
+ oldval = tMPI_Atomic_get(a);
+ newval = oldval + i;
+ }
+ /*while(!__compare_and_swap( &(a->value), &oldval, newval));*/
+ while(__check_lock_mp( &(a->value), oldval, newval));
+
+ __isync();
+
+ return newval;
+#else
+ int t;
+
__asm__ __volatile__("1: lwarx %0,0,%2 \n"
"\t add %0,%1,%0 \n"
"\t stwcx. %0,0,%2 \n"
: "=&r" (t)
: "r" (i), "r" (&a->value) );
return t;
+#endif
}
-static int tMPI_Atomic_fetch_add(tMPI_Atomic_t * a,
- int i)
+static inline int tMPI_Atomic_fetch_add(tMPI_Atomic_t *a, int i)
{
- int t;
+#ifdef TMPI_XLC_INTRINSICS
+ int oldval,newval;
+ do
+ {
+ __eieio(); /* these memory barriers are neccesary */
+ oldval = tMPI_Atomic_get(a);
+ newval = oldval + i;
+ }
+ /*while(__check_lock_mp((const int*)&(a->value), oldval, newval));*/
+ while(__check_lock_mp( &(a->value), oldval, newval));
+ /*while(!__compare_and_swap( &(a->value), &oldval, newval));*/
+ __isync();
+
+ return oldval;
+#else
+ int t;
+
__asm__ __volatile__("\t eieio\n"
- "1: lwarx %0,0,%2 \n"
+ "1: lwarx %0,0,%2 \n"
"\t add %0,%1,%0 \n"
"\t stwcx. %0,0,%2 \n"
"\t bne- 1b \n"
"\t isync \n"
: "=&r" (t)
: "r" (i), "r" (&a->value));
-
- return (t - i);
-}
-
-
-static int tMPI_Atomic_cas(tMPI_Atomic_t * a,
- int oldval,
- int newval)
-{
- int prev;
-
- __asm__ __volatile__ ("1: lwarx %0,0,%2 \n"
- "\t cmpw 0,%0,%3 \n"
- "\t bne 2f \n"
- "\t stwcx. %4,0,%2 \n"
- "\t bne- 1b \n"
- "\t sync \n"
- "2: \n"
- : "=&r" (prev), "=m" (a->value)
- : "r" (&a->value), "r" (oldval), "r" (newval),
- "m" (a->value));
-
- return prev;
-}
-
-static void* tMPI_Atomic_ptr_cas(tMPI_Atomic_ptr_t * a,
- void* oldval,
- void* newval)
-{
- void* prev;
-
-#if (!defined(__PPC64__)) && (!defined(__ppc64))
- __asm__ __volatile__ ("1: lwarx %0,0,%2 \n"
- "\t cmpw 0,%0,%3 \n"
- "\t bne 2f \n"
- "\t stwcx. %4,0,%2 \n"
- "\t bne- 1b \n"
- "\t sync \n"
- "2: \n"
- : "=&r" (prev), "=m" (a->value)
- : "r" (&a->value), "r" (oldval), "r" (newval),
- "m" (a->value));
-
-#else
- __asm__ __volatile__ ("1: ldarx %0,0,%2 \n"
- "\t cmpd 0,%0,%3 \n"
- "\t bne 2f \n"
- "\t stdcx. %4,0,%2 \n"
- "\t bne- 1b \n"
- "\t sync \n"
- "2: \n"
- : "=&r" (prev), "=m" (a->value)
- : "r" (&a->value), "r" (oldval), "r" (newval),
- "m" (a->value));
+ return (t - i);
#endif
- return prev;
}
-static void tMPI_Spinlock_init(tMPI_Spinlock_t *x)
+static inline void tMPI_Spinlock_init(tMPI_Spinlock_t *x)
{
- x->lock = 0;
+ __clear_lock_mp((const int*)x,0);
}
-
-static void tMPI_Spinlock_lock(tMPI_Spinlock_t * x)
+static inline void tMPI_Spinlock_lock(tMPI_Spinlock_t *x)
{
- unsigned int tmp;
-
- __asm__ __volatile__("\t b 1f \n"
- "2: lwzx %0,0,%1 \n"
- "\t cmpwi 0,%0,0 \n"
- "\t bne+ 2b \n"
- "1: lwarx %0,0,%1 \n"
- "\t cmpwi 0,%0,0 \n"
- "\t bne- 2b \n"
- "\t stwcx. %2,0,%1 \n"
- "\t bne- 2b \n"
- "\t isync\n"
- : "=&r"(tmp)
- : "r"(&x->lock), "r"(1));
+ do
+ {
+ tMPI_Atomic_memory_barrier();
+ }
+ while(__check_lock_mp(&(x->lock), 0, 1));
}
-static int tMPI_Spinlock_trylock(tMPI_Spinlock_t * x)
+static inline int tMPI_Spinlock_trylock(tMPI_Spinlock_t *x)
{
- unsigned int old, t;
- unsigned int mask = 1;
- volatile unsigned int *p = &x->lock;
-
- __asm__ __volatile__("\t eieio\n"
- "1: lwarx %0,0,%4 \n"
- "\t or %1,%0,%3 \n"
- "\t stwcx. %1,0,%4 \n"
- "\t bne 1b \n"
- "\t sync \n"
- : "=&r" (old), "=&r" (t), "=m" (*p)
- : "r" (mask), "r" (p), "m" (*p));
-
- return ((old & mask) != 0);
+ /* Return 0 if we got the lock */
+ return (__check_lock_mp(&(x->lock), 0, 1) != 0);
}
-static void tMPI_Spinlock_unlock(tMPI_Spinlock_t * x)
+static inline void tMPI_Spinlock_unlock(tMPI_Spinlock_t *x)
{
- __asm__ __volatile__("\t eieio \n");
- x->lock = 0;
+ __clear_lock_mp(&(x->lock),0);
}
-static int tMPI_Spinlock_islocked(const tMPI_Spinlock_t * x)
+static inline int tMPI_Spinlock_islocked(const tMPI_Spinlock_t *x)
{
- return ( x->lock != 0);
+ tMPI_Atomic_memory_barrier();
+ return ((x->lock) != 0);
}
-static void tMPI_Spinlock_wait(tMPI_Spinlock_t * x)
+static inline void tMPI_Spinlock_wait(tMPI_Spinlock_t *x)
{
-
- do
+ do
{
tMPI_Atomic_memory_barrier();
}
}
-
-
g_rotmat.1 \
g_saltbr.1 \
g_sas.1 \
- g_sdf.1 \
g_select.1 \
g_sgangle.1 \
g_sham.1 \
tMPI_Atomic_set(&(met->n_remaining), 0);
met->buf=(void**)tMPI_Malloc(sizeof(void*)*N);
met->bufsize=(size_t*)tMPI_Malloc(sizeof(size_t)*N);
- met->read_data=(gmx_bool*)tMPI_Malloc(sizeof(gmx_bool)*N);
+ met->read_data=(tmpi_bool*)tMPI_Malloc(sizeof(tmpi_bool)*N);
#ifdef USE_COLLECTIVE_COPY_BUFFER
met->cpbuf=(tMPI_Atomic_ptr_t*)tMPI_Malloc(sizeof(tMPI_Atomic_ptr_t)*N);
met->cb=NULL;
{
void *srcbuf;
#ifdef USE_COLLECTIVE_COPY_BUFFER
- gmx_bool decrease_ctr=FALSE;
+ tmpi_bool decrease_ctr=FALSE;
#endif
if ( sendsize > recvsize )
int i;
#ifdef USE_COLLECTIVE_COPY_BUFFER
/* decide based on the number of waiting threads */
- gmx_bool using_cb=(bufsize < (size_t)(n_remaining*COPY_BUFFER_SIZE));
+ tmpi_bool using_cb=(bufsize < (size_t)(n_remaining*COPY_BUFFER_SIZE));
cev->met[myrank].using_cb=using_cb;
if (using_cb)
be double-buffering) so we always spin here. */
tMPI_Atomic_memory_barrier();
#if 0
- while (tMPI_Atomic_cas( &(cev->met[rank].buf_readcount), 0,
- -100000) != 0)
+ while (!tMPI_Atomic_cas( &(cev->met[rank].buf_readcount), 0,
+ -100000))
#endif
#if 1
while (tMPI_Atomic_fetch_add( &(cev->met[myrank].buf_readcount), 0)
{
if (comm1->grp.peers[i] != comm2->grp.peers[i])
{
- gmx_bool found=FALSE;
+ tmpi_bool found=FALSE;
*result=TMPI_SIMILAR;
for(j=0;j<comm2->grp.N;j++)
/* initialize the main barrier */
tMPI_Barrier_init(&(ret->barrier), N);
-#if 0
- {
- /* calculate the number of reduce barriers */
- int Nbarriers=0;
- int Nred=N;
- while(Nred>1) {
- Nbarriers+=1;
- Nred = Nred/2 + Nred%2;
- }
-
- ret->Nreduce_barriers=Nbarriers;
- ret->reduce_barrier=(tMPI_Barrier_t*)
- tMPI_Malloc(sizeof(tMPI_Barrier_t)*(Nbarriers+1));
- ret->N_reduce_barrier=(int*)tMPI_Malloc(sizeof(int)*(Nbarriers+1));
- Nred=N;
- for(i=0;i<Nbarriers;i++)
- {
- tMPI_Barrier_init( &(ret->reduce_barrier[i]), Nred);
- ret->N_reduce_barrier[i]=Nred;
- /* Nred is now Nred/2 + a rest term because solitary
- process at the end of the list must still be accounter for */
- Nred = Nred/2 + Nred%2;
- }
- }
-#endif
-
/* the reduce barriers */
{
/* First calculate the number of reduce barriers */
int *group)
{
int i,j;
- gmx_bool found;
+ tmpi_bool found;
/* reset groups */
for(i=0;i<N;i++)
the threads actually suplies
these arrays to the comm
structure) */
- gmx_bool i_am_first=FALSE;
+ tmpi_bool i_am_first=FALSE;
int myrank=tMPI_Comm_seek_rank(comm, tMPI_Get_current());
struct tmpi_split *spl;
/* Group query & manipulation functions */
-gmx_bool tMPI_In_group(tMPI_Group group)
+tmpi_bool tMPI_In_group(tMPI_Group group)
{
int i;
struct tmpi_thread *cur;
#endif
#ifdef HAVE_SYS_TIME_H
-#include <unistd.h>
+#include <sys/time.h>
#endif
#include <errno.h>
**************************************************************************/
-
-#ifndef __cplusplus
-typedef int gmx_bool;
+typedef int tmpi_bool;
#define TRUE 1
#define FALSE 0
-#else
-#ifndef TRUE
-#define TRUE true
-#endif
-#ifndef FALSE
-#define FALSE false
-#endif
-#endif
-
-
size_t bufsize; /* the size of the data to be transmitted */
tMPI_Datatype datatype; /* the data type */
- gmx_bool nonblock; /* whether the receiver is non-blocking */
+ tmpi_bool nonblock; /* whether the receiver is non-blocking */
/* state, values from enum_envelope_state .
(there's a few busy-waits relying on this flag).
/* prev and next envelopes in the send/recv_envelope_list linked list */
struct envelope *prev,*next;
- gmx_bool send; /* whether this is a send envelope (if TRUE), or a receive
+ tmpi_bool send; /* whether this is a send envelope (if TRUE), or a receive
envelope (if FALSE) */
#ifdef USE_SEND_RECV_COPY_BUFFER
- gmx_bool using_cb; /* whether a copy buffer is (going to be) used */
+ tmpi_bool using_cb; /* whether a copy buffer is (going to be) used */
void* cb;/* the allocated copy buffer pointer */
#endif
/* the next and previous envelopes in the request list */
/* the request object for asynchronious operations. */
struct tmpi_req_
{
- gmx_bool finished; /* whether it's finished */
+ tmpi_bool finished; /* whether it's finished */
struct envelope *ev; /* the envelope */
struct tmpi_thread *source; /* the message source (for receives) */
int tag; /* the tag */
int error; /* error code */
size_t transferred; /* the number of transferred bytes */
- gmx_bool cancelled; /* whether the transmission was canceled */
+ tmpi_bool cancelled; /* whether the transmission was canceled */
struct tmpi_req_ *next,*prev; /* next,prev request in linked list,
used in the req_list, but also in
size_t *bufsize; /* array of number of bytes to send/recv */
#ifdef USE_COLLECTIVE_COPY_BUFFER
- gmx_bool using_cb; /* whether a copy buffer is (going to be) used */
+ tmpi_bool using_cb; /* whether a copy buffer is (going to be) used */
tMPI_Atomic_t buf_readcount; /* Number of threads reading from buf
while using_cpbuf is true, but cpbuf
is still NULL. */
and the coll_env_thread is ready for re-use. */
tMPI_Event recv_ev; /* event associated with being a receiving thread. */
- gmx_bool *read_data; /* whether we read data from a specific thread. */
+ tmpi_bool *read_data; /* whether we read data from a specific thread. */
};
/* Collective communications once sync. These run in parallel with
{
volatile int Ncol_init;
volatile int Ncol_destroy;
- volatile gmx_bool can_finish;
+ volatile tmpi_bool can_finish;
volatile int *colors;
volatile int *keys;
};
tMPI_Op_fn *op_functions; /* array of op functions for this datatype */
int N_comp; /* number of components */
struct tmpi_datatype_component *comps; /* the components */
- gmx_bool committed; /* whether the data type is committed */
+ tmpi_bool committed; /* whether the data type is committed */
};
/* just as a shorthand: */
typedef struct tmpi_datatype_ tmpi_dt;
/* check whether we're the main thread */
-gmx_bool tMPI_Is_master(void);
+tmpi_bool tMPI_Is_master(void);
/* check whether the current process is in a group */
-gmx_bool tMPI_In_group(tMPI_Group group);
+tmpi_bool tMPI_In_group(tMPI_Group group);
/* find the rank of a thread in a comm */
int tMPI_Comm_seek_rank(tMPI_Comm comm, struct tmpi_thread *th);
#include "config.h"
#endif
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
+
+#include <errno.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
#include "thread_mpi/list.h"
head=(tMPI_Stack_element*)tMPI_Atomic_ptr_get( &(st->head) );
el->next=head;
}
- while (tMPI_Atomic_ptr_cas(&(st->head), head, el)!=(void*)head);
+ while (!tMPI_Atomic_ptr_cas(&(st->head), head, el));
}
tMPI_Stack_element *tMPI_Stack_pop(tMPI_Stack *st)
next=head->next;
else
next=NULL;
- } while (tMPI_Atomic_ptr_cas(&(st->head), head, next)!=(void*)head);
+ } while (!tMPI_Atomic_ptr_cas(&(st->head), head, next));
return head;
}
do
{
head=(tMPI_Stack_element*)tMPI_Atomic_ptr_get( &(st->head) );
- } while (tMPI_Atomic_ptr_cas(&(st->head), head, NULL)!=(void*)head);
+ } while (!tMPI_Atomic_ptr_cas(&(st->head), head, NULL));
return head;
}
do
{
- } while (tMPI_Atomic_ptr_cas(&(q->head), head,
+ } while (!tMPI_Atomic_ptr_cas(&(q->head), head, next));
}
#endif
if ((csync->syncs - syncs > 0) && /* check if sync was an earlier number.
If it is a later number, we can't
have been the first to arrive here. */
- tMPI_Atomic_cas(&(cev->coll.current_sync), syncs, csync->syncs)==syncs)
+ tMPI_Atomic_cas(&(cev->coll.current_sync), syncs, csync->syncs))
{
/* we're the first! */
function(param);
Calculating the difference instead
of comparing directly avoids ABA
problems. */
- tMPI_Atomic_cas(&(cev->coll.current_sync), syncs, csync->syncs)==syncs)
+ tMPI_Atomic_cas(&(cev->coll.current_sync), syncs, csync->syncs))
{
/* we're the first! */
ret=function(param);
/* check for the completion of a single request */
-static gmx_bool tMPI_Test_single(struct tmpi_thread *cur, struct tmpi_req_ *rq);
+static tmpi_bool tMPI_Test_single(struct tmpi_thread *cur,
+ struct tmpi_req_ *rq);
/* check and wait for the completion of a single request */
static void tMPI_Wait_single(struct tmpi_thread *cur, struct tmpi_req_ *rq);
/* check for the completion of a NULL-delimited doubly linked list of
requests */
-static gmx_bool tMPI_Test_multi(struct tmpi_thread *cur, struct tmpi_req_ *rqs,
- gmx_bool *any_done);
+static tmpi_bool tMPI_Test_multi(struct tmpi_thread *cur, struct tmpi_req_ *rqs,
+ tmpi_bool *any_done);
/* we detach by swapping what we expect the pointer value to be,
with NULL. If there were a cross-platform way to atomically
swap without checking, we could do that, too. */
- while(tMPI_Atomic_ptr_cas( &(evl->head_rts), ret, NULL ) !=
- (void*)ret)
+ while(!tMPI_Atomic_ptr_cas( &(evl->head_rts), ret, NULL ))
{
ret=(struct envelope*)tMPI_Atomic_ptr_get(&(evl->head_rts));
}
/* the cmpxchg operation is a memory fence, so we shouldn't need
to worry about out-of-order evaluation */
}
- while (tMPI_Atomic_ptr_cas( &(evl->head_rts), sevn, sev ) != (void*)sevn);
+ while (!tMPI_Atomic_ptr_cas( &(evl->head_rts), sevn, sev ));
#else
tMPI_Spinlock_lock( &(evl->lock_rts) );
ev->next=(struct envelope*)evl->head_rts;
{
#ifdef TMPI_LOCK_FREE_LISTS
struct envelope *evl_head_new_orig;
- struct envelope *evl_cas;
#endif
sev->prev=NULL;
sev->next=evl_head_new_orig;
/* do the compare-and-swap.
this operation is a memory fence, so we shouldn't need
- to worry about out-of-order stores */
- evl_cas=(struct envelope*)tMPI_Atomic_ptr_cas(&(evl->head_new),
- evl_head_new_orig, sev);
- /* and compare the results: if they aren't the same,
+ to worry about out-of-order stores. If it returns false,
somebody else got there before us: */
- } while (evl_cas != evl_head_new_orig);
+ } while (!tMPI_Atomic_ptr_cas(&(evl->head_new), evl_head_new_orig, sev));
+
#else
tMPI_Spinlock_lock( &(evl->lock_new) );
/* we add to the start of the list */
}
-static gmx_bool tMPI_Envelope_matches(const struct envelope *sev,
- const struct envelope *rev)
+static tmpi_bool tMPI_Envelope_matches(const struct envelope *sev,
+ const struct envelope *rev)
{
#ifdef TMPI_DEBUG
printf("%5d: tMPI_Envelope_matches (%d->%d)==(%d->%d), tag=(%d==%d), \n datatype=(%ld==%ld), comm=(%ld,%ld),\n finished=(%d==%d)\n",
/* first copy */
memcpy(sev->cb, sev->buf, sev->bufsize);
/* now set state, if other side hasn't started copying yet. */
- if (tMPI_Atomic_cas( &(sev->state), env_unmatched, env_cb_available)
- == env_unmatched)
+ if (tMPI_Atomic_cas( &(sev->state), env_unmatched, env_cb_available))
{
/* if it was originally unmatched, the receiver wasn't
copying the old buffer. We can don't need to wait,
static struct envelope* tMPI_Prep_send_envelope(struct send_envelope_list *evl,
tMPI_Comm comm, struct tmpi_thread *src,
struct tmpi_thread *dest, void *buf, int count,
- tMPI_Datatype datatype, int tag, gmx_bool nonblock)
+ tMPI_Datatype datatype, int tag, tmpi_bool nonblock)
{
/* get an envelope from the send-envelope stack */
struct envelope *ev=tMPI_Send_env_list_fetch_new( evl );
static struct envelope* tMPI_Prep_recv_envelope(struct tmpi_thread *cur,
tMPI_Comm comm, struct tmpi_thread *src,
struct tmpi_thread *dest, void *buf, int count,
- tMPI_Datatype datatype, int tag, gmx_bool nonblock)
+ tMPI_Datatype datatype, int tag, tmpi_bool nonblock)
{
/* get an envelope from the stack */
struct envelope *ev=tMPI_Free_env_list_fetch_recv( &(cur->envelopes) );
#ifdef USE_SEND_RECV_COPY_BUFFER
/* we remove the sender's envelope only if we do the transfer, which
we always do if the buffer size = 0 */
- gmx_bool remove_sender = (sev->bufsize==0);
+ tmpi_bool remove_sender = (sev->bufsize==0);
#endif
#ifdef TMPI_DEBUG
printf("%5d: tMPI_Xfer (%d->%d, tag=%d) started\n",
if (sev->using_cb)
{
/* check if the other side has already finished copying */
- if (tMPI_Atomic_cas( &(sev->state), env_unmatched, env_copying)
- != env_unmatched)
+ if (!tMPI_Atomic_cas( &(sev->state), env_unmatched, env_copying))
{
/* it has, and we're copying from the new buffer.
We're now also tasked with removing the envelope */
struct tmpi_thread *src,
void *recv_buf, int recv_count,
tMPI_Datatype datatype,
- int tag, gmx_bool nonblock)
+ int tag, tmpi_bool nonblock)
{
struct tmpi_thread *dest=cur;
struct envelope *rev;
struct tmpi_thread *dest,
void *send_buf, int send_count,
tMPI_Datatype datatype, int tag,
- gmx_bool nonblock)
+ tmpi_bool nonblock)
{
struct tmpi_thread *src=cur;
struct envelope *sev;
#ifdef TMPI_LOCK_FREE_LISTS
/* Behold our lock-free shared linked list:
(see tMPI_Send_env_list_add_new for more info) */
- struct envelope *evl_cas;
-
do
{
/* read old head atomically */
sev_head=(struct envelope*)
tMPI_Atomic_ptr_get( &(cur->evs[i].head_new) );
/* do the compare-and-swap to detach the list */
- evl_cas=(struct envelope*)
- tMPI_Atomic_ptr_cas(&(cur->evs[i].head_new), sev_head,
- NULL);
- } while (evl_cas != sev_head);
+ } while (!tMPI_Atomic_ptr_cas(&(cur->evs[i].head_new), sev_head,
+ NULL));
#else
tMPI_Spinlock_lock( &(cur->evs[i].lock_new) );
sev_head=(struct send_envelope*)cur->evs[i].head_new;
tMPI_Event_process( &(cur->p2p_event), n_handled);
}
-static gmx_bool tMPI_Test_single(struct tmpi_thread *cur, struct tmpi_req_ *rq)
+static tmpi_bool tMPI_Test_single(struct tmpi_thread *cur, struct tmpi_req_ *rq)
{
struct envelope *ev=rq->ev;
} while(TRUE);
}
-static gmx_bool tMPI_Test_multi(struct tmpi_thread *cur, struct tmpi_req_ *rqs,
- gmx_bool *any_done)
+static tmpi_bool tMPI_Test_multi(struct tmpi_thread *cur, struct tmpi_req_ *rqs,
+ tmpi_bool *any_done)
{
- gmx_bool all_done=TRUE;
+ tmpi_bool all_done=TRUE;
struct tmpi_req_ *creq=rqs;
int i=0;
while(creq)
{
- gmx_bool finished=tMPI_Test_single(cur, creq);
+ tmpi_bool finished=tMPI_Test_single(cur, creq);
i++;
/* now do the check */
blocking = whether to block until all reqs are completed */
static void tMPI_Test_multi_req(struct tmpi_thread *cur,
int count, tMPI_Request *array_of_requests,
- gmx_bool wait, gmx_bool blocking)
+ tmpi_bool wait, tmpi_bool blocking)
{
int i;
struct tmpi_req_ *first=NULL, *last=NULL;
/* for the first iteration, the inputs are in the
sendbuf*/
a=sendbuf;
- b=tMPI_Atomic_ptr_get(&(comm->reduce_sendbuf[nbr]));
+ b=(void*)tMPI_Atomic_ptr_get(&(comm->reduce_sendbuf[nbr]));
}
else
{
/* after the first operation, they're already in
the recvbuf */
a=recvbuf;
- b=tMPI_Atomic_ptr_get(&(comm->reduce_recvbuf[nbr]));
+ b=(void*)tMPI_Atomic_ptr_get(&(comm->reduce_recvbuf[nbr]));
}
/* here we check for overlapping buffers */
if (a==b)
tMPI_Profile_wait_stop(cur, TMPIWAIT_Reduce);
#endif
/* distribute rootbuf */
- rootbuf=tMPI_Atomic_ptr_get(&(comm->reduce_recvbuf[0]));
+ rootbuf=(void*)tMPI_Atomic_ptr_get(&(comm->reduce_recvbuf[0]));
/* and now we just copy things back. We know that the root thread
arrives last, so there's no point in using tMPI_Scatter with
size_t sendsize=sendtype->size*sendcount;
size_t total_send_size=0;
#ifdef USE_COLLECTIVE_COPY_BUFFER
- gmx_bool using_cb;
+ tmpi_bool using_cb;
#endif
if (!sendbuf) /* don't do pointer arithmetic on a NULL ptr */
int i;
size_t total_send_size=0;
#ifdef USE_COLLECTIVE_COPY_BUFFER
- gmx_bool using_cb;
+ tmpi_bool using_cb;
#endif
if (!sendbuf) /* don't do pointer arithmetic on a NULL ptr */
/* whether MPI has finalized (we need this to distinguish pre-inited from
post-finalized states */
-static gmx_bool tmpi_finalized=FALSE;
+static tmpi_bool tmpi_finalized=FALSE;
/* misc. global information about MPI */
struct tmpi_global *tmpi_global=NULL;
/* start N threads with argc, argv (used by tMPI_Init)*/
-void tMPI_Start_threads(gmx_bool main_returns, int N, int *argc, char ***argv,
+void tMPI_Start_threads(tmpi_bool main_returns, int N, int *argc, char ***argv,
void (*start_fn)(void*), void *start_arg,
int (*start_fn_main)(int, char**));
}
#endif
-gmx_bool tMPI_Is_master(void)
+tmpi_bool tMPI_Is_master(void)
{
/* if there are no other threads, we're the main thread */
if ( (!TMPI_COMM_WORLD) || TMPI_COMM_WORLD->grp.N==0)
/* otherwise we know this through thread specific data: */
/* whether the thread pointer points to the head of the threads array */
- return (gmx_bool)(tMPI_Get_current() == threads);
+ return (tmpi_bool)(tMPI_Get_current() == threads);
}
tMPI_Comm tMPI_Get_comm_self(void)
}
-void tMPI_Start_threads(gmx_bool main_returns, int N, int *argc, char ***argv,
+void tMPI_Start_threads(tmpi_bool main_returns, int N, int *argc, char ***argv,
void (*start_fn)(void*), void *start_arg,
int (*start_fn_main)(int, char**))
{
struct tmpi_datatype_ *lt=tmpi_global->usertypes[i];
if (lt->committed && lt->N_comp==dt->N_comp)
{
- gmx_bool found=TRUE;
+ tmpi_bool found=TRUE;
for(j=0;j<lt->N_comp;j++)
{
if ( (lt->comps[j].type != dt->comps[j].type) ||
}
if (dt != *datatype)
{
- gmx_bool found=FALSE;
+ tmpi_bool found=FALSE;
/* we remove the old one from the list */
for(i=0;i<tmpi_global->N_usertypes;i++)
{