Merge branch 'release-4-5-patches' of git://git.gromacs.org/gromacs into release...
authorSzilard Pall <pszilard@cbr.su.se>
Tue, 31 Aug 2010 15:25:37 +0000 (17:25 +0200)
committerSzilard Pall <pszilard@cbr.su.se>
Tue, 31 Aug 2010 15:25:37 +0000 (17:25 +0200)
24 files changed:
include/thread_mpi/atomic.h
include/thread_mpi/atomic/Makefile.am
include/thread_mpi/atomic/gcc_ia64.h
include/thread_mpi/atomic/gcc_intrinsics.h
include/thread_mpi/atomic/gcc_ppc.h
include/thread_mpi/atomic/gcc_x86.h
include/thread_mpi/atomic/hpux.h
include/thread_mpi/atomic/msvc.h
include/thread_mpi/atomic/xlc_aix.h [deleted file]
include/thread_mpi/atomic/xlc_ppc.h
man/man1/Makefile.am
src/gmxlib/thread_mpi/collective.c
src/gmxlib/thread_mpi/comm.c
src/gmxlib/thread_mpi/group.c
src/gmxlib/thread_mpi/impl.h
src/gmxlib/thread_mpi/list.c
src/gmxlib/thread_mpi/once.c
src/gmxlib/thread_mpi/p2p.c
src/gmxlib/thread_mpi/p2p_protocol.h
src/gmxlib/thread_mpi/p2p_wait.h
src/gmxlib/thread_mpi/reduce.h
src/gmxlib/thread_mpi/scatter.h
src/gmxlib/thread_mpi/tmpi_init.c
src/gmxlib/thread_mpi/type.c

index da7a3967120df80dd27675e33a766f8183e16f2f..98d9462eb2b48c22adcf8ac181616176d4a5b686 100644 (file)
@@ -138,7 +138,7 @@ extern "C"
 
 #elif defined(__xlC__) && defined (_AIX)
 /* IBM xlC compiler on AIX */
-#include "atomic/xlc_aix.h"
+#include "atomic/xlc_ppc.h"
 
 #elif (defined(__hpux) || defined(__HP_cc)) && defined(__ia64)
 /* HP compiler on ia64 */
@@ -423,9 +423,14 @@ static inline int tMPI_Atomic_fetch_add(tMPI_Atomic_t *a, int i)
  *   The \a old value is compared with the memory value in the atomic datatype.
  *   If the are identical, the atomic type is swapped with the new value, 
  *   and otherwise left unchanged. 
+ * 
+ *   This is *the* synchronization primitive: it has a consensus number of
+ *   infinity, and is available in some form on all modern CPU architectures.
+ *   In the words of Herlihy&Shavit (The art of multiprocessor programming),
+ *   it is the 'king of all wild things'. 
  *  
- *   This is a very useful synchronization primitive: You can start by reading
- *   a value (without locking anything), perform some calculations, and then
+ *   In practice, use it as follows: You can start by reading a value 
+ *   (without locking anything), perform some calculations, and then
  *   atomically try to update it in memory unless it has changed. If it has
  *   changed you will get an error return code - reread the new value
  *   an repeat the calculations in that case.
@@ -435,23 +440,21 @@ static inline int tMPI_Atomic_fetch_add(tMPI_Atomic_t *a, int i)
  *   \param new_val  New value to write to the atomic type if it currently is
  *                   identical to the old value.
  *
- *   \return The value of the atomic memory variable in memory when this 
- *           instruction was executed. This, if the operation succeeded the
- *           return value was identical to the \a old parameter, and if not
- *           it returns the updated value in memory so you can repeat your
- *           operations on it. 
- *
+ *   \return    True (1) if the swap occurred: i.e. if the value in a was equal
+ *              to old_val. False (0) if the swap didn't occur and the value
+ *              was not equal to old_val.
+ * 
  *   \note   The exchange occured if the return value is identical to \a old.
  */
 static inline int tMPI_Atomic_cas(tMPI_Atomic_t *a, int old_val, int new_val)
 {
-    int t;
+    int t=0;
     
     tMPI_Thread_mutex_lock(&tMPI_Atomic_mutex);
-    t=old_val;
     if (a->value == old_val)
     {
         a->value = new_val;
+        t=1;
     }
     tMPI_Thread_mutex_unlock(&tMPI_Atomic_mutex);
     return t;
@@ -467,31 +470,29 @@ static inline int tMPI_Atomic_cas(tMPI_Atomic_t *a, int old_val, int new_val)
  *   and otherwise left unchanged. 
  *  
  *   This is essential for implementing wait-free lists and other data
- *   structures. 
+ *   structures. See 'tMPI_Atomic_cas()'.
  *
  *   \param a        Atomic datatype ('memory' value)
  *   \param old_val  Pointer value read from the atomic type at an earlier point
  *   \param new_val  New value to write to the atomic type if it currently is
  *                   identical to the old value.
  *
- *   \return The value of the atomic pointer in memory when this 
- *           instruction was executed. This, if the operation succeeded the
- *           return value was identical to the \a old parameter, and if not
- *           it returns the updated value in memory so you can repeat your
- *           operations on it. 
- *
+ *   \return    True (1) if the swap occurred: i.e. if the value in a was equal
+ *              to old_val. False (0) if the swap didn't occur and the value
+ *              was not equal to old_val.
+ * 
  *   \note   The exchange occured if the return value is identical to \a old.
  */
-static inline void* tMPI_Atomic_ptr_cas(tMPI_Atomic_ptr_t * a, void *old_val,
-                                        void *new_val)
+static inline int tMPI_Atomic_ptr_cas(tMPI_Atomic_ptr_t * a, void *old_val,
+                                      void *new_val)
 {
-    void *t;
+    int t=0;
     
     tMPI_Thread_mutex_lock(&tMPI_Atomic_mutex);
-    t=old_val;
     if (a->value == old_val)
     {
         a->value = new_val;
+        t=1;
     }
     tMPI_Thread_mutex_unlock(&tMPI_Atomic_mutex);
     return t;
@@ -622,7 +623,7 @@ static inline int tMPI_Atomic_swap(tMPI_Atomic_t *a, int b)
     do
     {
         oldval=(int)(a->value);
-    } while(tMPI_Atomic_cas(a, oldval, b) != oldval);
+    } while(!tMPI_Atomic_cas(a, oldval, b));
     return oldval;
 }
 /** Atomic swap pointer operation.
@@ -640,7 +641,7 @@ static inline void *tMPI_Atomic_ptr_swap(tMPI_Atomic_ptr_t *a, void *b)
     do
     {
         oldval=(void*)(a->value);
-    } while(tMPI_Atomic_ptr_cas(a, oldval, b) != oldval);
+    } while(!tMPI_Atomic_ptr_cas(a, oldval, b));
     return oldval;
 }
 
index 4167c39045242e51b7eae247da91181dea4a745d..065022a0c734825ca6f4a149c4e78016ba6df58a 100644 (file)
@@ -7,7 +7,7 @@
 pkgincludethread_mpiatomicdir = ${pkgincludedir}/thread_mpi/atomic
 
 pkgincludethread_mpiatomic_HEADERS = \
-       gcc.h            gcc_ppc.h        hpux.h           xlc_aix.h \
+       gcc.h            gcc_ppc.h        hpux.h           \
        gcc_ia64.h       gcc_spinlock.h   msvc.h           xlc_ppc.h \
        gcc_intrinsics.h gcc_x86.h        
 
index 4aec3c1f5e29ec63b4e1f6e56e1ec588a7ec6f3c..76514af53256dac5f805d0bc836a3cd7458a04be 100644 (file)
@@ -116,10 +116,10 @@ unsigned __int64 __fetchadd4_rel(unsigned int *addend, const int increment);
 #define tMPI_Atomic_memory_barrier() __sync_synchronize()
 /* ia64 cmpxchg */
 #define tMPI_Atomic_cas(a, oldval, newval) \
-          _InterlockedCompareExchange(&((a)->value),newval,oldval)
+          (_InterlockedCompareExchange(&((a)->value),newval,oldval) == oldval)
 /* ia64 pointer cmpxchg */
 #define tMPI_Atomic_ptr_cas(a, oldval, newval) \
-          _InterlockedCompareExchangePointer(&((a)->value),newval,oldval)
+          (_InterlockedCompareExchangePointer(&((a)->value),newval,oldval)==oldval)
 
 /*#define tMPI_Atomic_ptr_cas(a, oldval, newval) __sync_val_compare_and_swap(&((a)->value),newval,oldval)*/
 
@@ -145,15 +145,15 @@ static inline int tMPI_Atomic_cas(tMPI_Atomic_t *a, int oldval, int newval)
     asm volatile ("cmpxchg4.acq %0=[%1],%2,ar.ccv":                    
                   "=r"(res) : "r"(&a->value), "r"(newval) : "memory"); 
                           
-    return res;
+    return res==oldval;
 #else
-    return __sync_val_compare_and_swap( &(a->value), oldval, newval);
+    return __sync_bool_compare_and_swap( &(a->value), oldval, newval);
 #endif
 }
 
 /* ia64 ptr cmpxchg */
-static inline void* tMPI_Atomic_ptr_cas(tMPI_Atomic_ptr_t * a, void *oldval, 
-                                        void *newval)
+static inline int tMPI_Atomic_ptr_cas(tMPI_Atomic_ptr_t * a, void *oldval, 
+                                      void *newval)
 {
 #if GCC_VERSION < 40200
     void* volatile* res;
@@ -161,9 +161,9 @@ static inline void* tMPI_Atomic_ptr_cas(tMPI_Atomic_ptr_t * a, void *oldval,
     asm volatile ("cmpxchg8.acq %0=[%1],%2,ar.ccv":                    
                   "=r"(res) : "r"(&a->value), "r"(newval) : "memory"); 
                           
-    return (void*)res;
+    return ((void*)res)==oldval;
 #else
-    return (void*)__sync_val_compare_and_swap( &(a->value), oldval, newval);
+    return __sync_bool_compare_and_swap( &(a->value), oldval, newval);
 #endif
 }
 
@@ -209,7 +209,7 @@ static inline int tMPI_Atomic_add_return(tMPI_Atomic_t *a, int i)
             oldval = tMPI_Atomic_get(a);
             newval = oldval + i;
         }
-        while(tMPI_Atomic_cas(a,oldval,newval) != oldval);
+        while(!tMPI_Atomic_cas(a,oldval,newval));
     }
     return (int)newval;
 }
@@ -240,7 +240,7 @@ static inline int tMPI_Atomic_fetch_add(tMPI_Atomic_t *a, int i)
             oldval = tMPI_Atomic_get(a);
             newval = oldval + i;
         }
-        while(tMPI_Atomic_cas(a,oldval,newval) != oldval);
+        while(!tMPI_Atomic_cas(a,oldval,newval));
     }
     return (int)oldval;
 }
@@ -262,9 +262,9 @@ static inline void tMPI_Spinlock_init(tMPI_Spinlock_t *x)
 static inline void tMPI_Spinlock_lock(tMPI_Spinlock_t *x)
 {
     tMPI_Atomic_t *a = (tMPI_Atomic_t *) x;
-    unsigned long value;                                                 
-    value = tMPI_Atomic_cas(a, 0, 1);                             
-    if (value)                                                           
+    int succeeded;                                                 
+    succeeded = tMPI_Atomic_cas(a, 0, 1);                             
+    if (!succeeded)                                                           
     {                                                                    
         do                                                               
         {                                                                
@@ -272,16 +272,16 @@ static inline void tMPI_Spinlock_lock(tMPI_Spinlock_t *x)
             {                                                            
                 tMPI_Atomic_memory_barrier();                             
             }                                                            
-            value = tMPI_Atomic_cas(a, 0, 1);                       
+            succeeded = tMPI_Atomic_cas(a, 0, 1);                       
         }                                                                
-        while (value);                                                   
+        while (!succeeded);                                                   
     }                                                                    
 } 
 
 
 static inline int tMPI_Spinlock_trylock(tMPI_Spinlock_t *x)
 {
-    return (tMPI_Atomic_cas( ((tMPI_Atomic_t *)x), 0, 1) != 0);
+    return (tMPI_Atomic_cas( ((tMPI_Atomic_t *)x), 0, 1));
 }
 
 
index 7a0899ceb126e3a43b7fa64b8ea879499ecdcd21..49db56b102505d1d062826e01452d832bc8d61b7 100644 (file)
@@ -54,7 +54,7 @@ static inline int tMPI_Atomic_fetch_add(tMPI_Atomic_t *a, volatile int i)
 
 static inline int tMPI_Atomic_cas(tMPI_Atomic_t *a, int oldval, int newval)
 {
-    return __sync_val_compare_and_swap( &(a->value), oldval, newval);
+    return __sync_bool_compare_and_swap( &(a->value), oldval, newval);
 }
 
 
@@ -87,19 +87,18 @@ static inline void* tMPI_Atomic_ptr_swap(tMPI_Atomic_ptr_t *a, void *b)
 
 
 
-static inline volatile void* tMPI_Atomic_ptr_cas(tMPI_Atomic_ptr_t* a, 
-                                                     void *oldval, 
-                                                     void *newval)
+static inline int tMPI_Atomic_ptr_cas(tMPI_Atomic_ptr_t* a, void *oldval, 
+                                      void *newval)
 {
 #if !defined(__INTEL_COMPILER)
-    return __sync_val_compare_and_swap( &(a->value), oldval, newval);
+    return __sync_bool_compare_and_swap( &(a->value), oldval, newval);
 #else
     /* the intel compilers need integer type arguments for compare_and_swap.
         on the platforms supported by icc, size_t is always the size of
         a pointer. */
-    return (volatile void*)__sync_val_compare_and_swap( (size_t*)&(a->value), 
-                                                        (size_t)oldval, 
-                                                        (size_t)newval);
+    return (__sync_bool_compare_and_swap( (size_t*)&(a->value), 
+                                          (size_t)oldval, 
+                                          (size_t)newval) );
 #endif
 }
 
index 5e11dff9de3d0b74ab18161bf2078708f6efae52..dd410a0c6fc86d2d1a82cf6194b7493dcd8ece4b 100644 (file)
@@ -43,6 +43,7 @@ files.
  */
 
 
+/* this file is not used any more. gcc intrinsics take care of it */
 
 typedef struct tMPI_Atomic
 {
@@ -142,11 +143,11 @@ static inline int tMPI_Atomic_cas(tMPI_Atomic_t *a, int oldval, int newval)
                             "m" (a->value)
                           : "cc", "memory");
     
-    return prev;
+    return prev==oldval;
 }
 
 
-static inline void* tMPI_Atomic_ptr_cas(tMPI_Atomic_ptr_t *a, void *oldval,
+static inline int tMPI_Atomic_ptr_cas(tMPI_Atomic_ptr_t *a, void *oldval,
                                         void *newval)
 {
     void *prev;
@@ -176,7 +177,7 @@ static inline void* tMPI_Atomic_ptr_cas(tMPI_Atomic_ptr_t *a, void *oldval,
                             "m" (a->value)
                           : "cc", "memory");
 #endif
-    return prev;
+    return prev==oldval;
 }
 
 static inline int tMPI_Atomic_add_return(tMPI_Atomic_t *a, int i)
index 1dc9ff63276af25419a1458d609f071beb848d3c..ea0e6777bc485da23b1c91d24d4814e2551f5775 100644 (file)
@@ -128,12 +128,12 @@ static inline int tMPI_Atomic_cas(tMPI_Atomic_t *a, int oldval, int newval)
                          : "q"(newval), "m"(a->value), "0"(oldval)
                          : "memory");
     
-    return prev;
+    return prev==oldval;
 }
 
-static inline void* tMPI_Atomic_ptr_cas(tMPI_Atomic_ptr_t *a, 
-                                        void *oldval,
-                                        void *newval)
+static inline int tMPI_Atomic_ptr_cas(tMPI_Atomic_ptr_t *a, 
+                                      void *oldval,
+                                      void *newval)
 {
     void* prev;
 #ifndef __x86_64__ 
@@ -147,7 +147,7 @@ static inline void* tMPI_Atomic_ptr_cas(tMPI_Atomic_ptr_t *a,
                          : "q"(newval), "m"(a->value), "0"(oldval)
                          : "memory");
 #endif
-    return prev;
+    return prev==oldval;
 }
 
 #endif /* end of check for gcc intrinsics */
index bce1ad28c39ac293f9e06806e4bd68f7805fff11..567b58c78af72c3678d4e866e56fcd876fe4c9da 100644 (file)
@@ -81,7 +81,7 @@ static inline int tMPI_Atomic_cas(tMPI_Atomic_t *   a,
     ret = _Asm_cmpxchg((_Asm_sz)SZ_W,(_Asm_sem)_SEM_ACQ,(Uint32*)a,    
                        (Uint32)newval,(_Asm_ldhint)_LDHINT_NONE);
                    
-    return ret;
+    return ret==oldval;
 }
 
 
@@ -101,7 +101,7 @@ static inline void* tMPI_Atomic_ptr_cas(tMPI_Atomic_ptr_t *  a,
     ret = _Asm_cmpxchg((_Asm_sz)SZ_W,(_Asm_sem)_SEM_ACQ,(Uint64)a,    
                        (Uint64)newval,(_Asm_ldhint)_LDHINT_NONE);
                    
-    return ret;
+    return ret==oldval;
 }
 
 
@@ -138,7 +138,7 @@ static inline void tMPI_Atomic_add_return(tMPI_Atomic_t *       a,
             oldval = tMPI_Atomic_get(a);
             newval = oldval + i;
         }
-        while(tMPI_Atomic_cas(a,oldval,newval) != oldval);
+        while(!tMPI_Atomic_cas(a,oldval,newval));
     }
     return newval;
 }
@@ -169,7 +169,7 @@ static inline int tMPI_Atomic_fetch_add(tMPI_Atomic_t *     a,
             oldval = tMPI_Atomic_get(a);
             newval = oldval + i;
         }
-        while(tMPI_Atomic_cas(a,oldval,newval) != oldval);
+        while(!tMPI_Atomic_cas(a,oldval,newval));
     }
     return oldval;
 }
index 6fa1b7952607589b2a41f96c64cd71444801b67e..abfb7ee5f4d2efa228455e73e717b6704d70c68d 100644 (file)
@@ -88,11 +88,11 @@ typedef struct tMPI_Spinlock
     ( (i) + InterlockedExchangeAdd((LONG volatile *)(a), (LONG) (i)) )
 
 #define tMPI_Atomic_cas(a, oldval, newval) \
-    InterlockedCompareExchange((LONG volatile *)(a), (LONG) (newval), (LONG) (oldval))
+    (InterlockedCompareExchange((LONG volatile *)(a), (LONG) (newval), (LONG) (oldval)) == (LONG)oldval)
 
 #define tMPI_Atomic_ptr_cas(a, oldval, newval) \
-    InterlockedCompareExchangePointer(&((a)->value), (PVOID) (newval),  \
-                                      (PVOID) (oldval))
+    (InterlockedCompareExchangePointer(&((a)->value), (PVOID) (newval),  \
+                                      (PVOID) (oldval)) == (PVOID)oldval)
 
 #define tMPI_Atomic_swap(a, b) \
     InterlockedExchange((LONG volatile *)(a), (LONG) (b))
diff --git a/include/thread_mpi/atomic/xlc_aix.h b/include/thread_mpi/atomic/xlc_aix.h
deleted file mode 100644 (file)
index 7a3aa3d..0000000
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
-This source code file is part of thread_mpi.  
-Written by Sander Pronk, Erik Lindahl, and possibly others. 
-
-Copyright (c) 2009, Sander Pronk, Erik Lindahl.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-1) Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
-2) Redistributions in binary form must reproduce the above copyright
-   notice, this list of conditions and the following disclaimer in the
-   documentation and/or other materials provided with the distribution.
-3) Neither the name of the copyright holders nor the
-   names of its contributors may be used to endorse or promote products
-   derived from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY US ''AS IS'' AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL WE BE LIABLE FOR ANY
-DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-If you want to redistribute modifications, please consider that
-scientific software is very special. Version control is crucial -
-bugs must be traceable. We will be happy to consider code for
-inclusion in the official distribution, but derived work should not
-be called official thread_mpi. Details are found in the README & COPYING
-files.
-*/
-
-/* IBM xlC compiler on AIX */
-#include <sys/atomic_op.h>
-
-
-#define tMPI_Atomic_memory_barrier()
-
-
-typedef struct tMPI_Atomic
-{
-    volatile int value;  /*!< Volatile, to avoid compiler aliasing */
-}
-tMPI_Atomic_t;
-
-
-typedef struct tMPI_Atomic_ptr
-{
-    void* volatile* value;  /*!< Volatile, to avoid compiler aliasing */
-}
-tMPI_Atomic_ptr_t;
-
-
-typedef struct tMPI_Spinlock
-{
-    volatile unsigned int lock;  /*!< Volatile, to avoid compiler aliasing */
-}
-tMPI_Spinlock_t;
-
-
-static inline int tMPI_Atomic_cas(tMPI_Atomic_t *a, int oldval, int newval)
-{
-    int t;
-    
-    if(__check_lock((atomic_p)&a->value, oldval, newval))
-    {
-        /* Not successful - value had changed in memory. Reload value. */
-        t = a->value;
-    }
-    else
-    {
-        /* replacement suceeded */
-        t = oldval;
-    }
-    return t;        
-}
-
-
-static inline void* tMPI_Atomic_ptr_cas(tMPI_Atomic_ptr_t *a, void* oldval,
-                                        void* newval)
-{
-    void *t;
-    
-    if(__check_lock((atomic_p)&a->value, oldval, newval))
-    {
-        /* Not successful - value had changed in memory. Reload value. */
-        t = a->value;
-    }
-    else
-    {
-        /* replacement suceeded */
-        t = oldval;
-    }
-    return t;        
-}
-
-
-
-
-static inline void tMPI_Atomic_add_return(tMPI_Atomic_t *a, int i)
-{
-    int oldval,newval;    
-    
-    do
-    {
-        oldval = tMPI_Atomic_get(a);
-        newval = oldval + i;
-    }
-    while(__check_lock((atomic_p)&a->value, oldval, newval));
-
-    return newval;
-}
-
-
-
-static inline void tMPI_Atomic_fetch_add(tMPI_Atomic_t *a, int i)
-{
-    int oldval,newval;    
-    
-    do
-    {
-        oldval = tMPI_Atomic_get(a);
-        newval = oldval + i;
-    }
-    while(__check_lock((atomic_p)&a->value, oldval, newval));
-    
-    return oldval;
-}
-
-
-static inline void tMPI_Spinlock_init(tMPI_Spinlock_t *x)
-{
-    __clear_lock((atomic_p)x,0);
-}
-
-
-static inline void tMPI_Spinlock_lock(tMPI_Spinlock_t *x)
-{
-    do
-    {
-        ;
-    }
-    while(__check_lock((atomic_p)x, 0, 1));
-}
-
-
-static inline void tMPI_Spinlock_trylock(tMPI_Spinlock_t *x)
-{
-    /* Return 0 if we got the lock */
-    return (__check_lock((atomic_p)x, 0, 1) != 0)
-}
-
-
-static inline void tMPI_Spinlock_unlock(tMPI_Spinlock_t *x)
-{
-    __clear_lock((atomic_p)x,0);
-}
-
-
-static inline void tMPI_Spinlock_islocked(const tMPI_Spinlock_t *x)
-{
-    return (*((atomic_p)x) != 0);
-}
-
-
-static inline void tMPI_Spinlock_wait(tMPI_Spinlock_t *x)
-{
-    while(spin_islocked(x)) { ; } 
-}
-
-
index 214a3a30258569ea9a48fe12f75f5f276ac5fd14..a40cbe485ac9b38c7f003b8cf66d177faefd8804 100644 (file)
@@ -44,54 +44,132 @@ files.
  *
  * For now, we just disable the inline keyword if we're compiling C code:
  */
+#if 1
 #if (!defined(__cplusplus)) && (!defined(inline))
 #define inline_defined_in_atomic 1
 #define inline
 #endif
+#endif
+
+
+/* IBM xlC compiler */
+#ifdef __cplusplus
+#include <builtins.h>
+#endif
 
 
-#define tMPI_Atomic_memory_barrier()  { __asm__ __volatile__("\t isync\n"\
-                                                             : : :"memory" ); }
+#define TMPI_XLC_INTRINSICS
 
+/* ppc has many memory synchronization instructions */
+/*#define tMPI_Atomic_memory_barrier() __sync();*/
+/*#define tMPI_Atomic_memory_barrier() __isync();*/
+/*#define tMPI_Atomic_memory_barrier() __lwsync();*/
 
+/* for normal memory, this should be enough: */
+#define tMPI_Atomic_memory_barrier() __eieio();
 
 
 typedef struct tMPI_Atomic
 {
-    volatile int value;  /*!< Volatile, to avoid compiler aliasing */
+    int value __attribute__ ((aligned(64)));  
 }
 tMPI_Atomic_t;
 
 
 typedef struct tMPI_Atomic_ptr
 {
-    void* volatile *value;  /*!< Volatile, to avoid compiler aliasing */
+    volatile char* volatile* value __attribute__ ((aligned(64)));  /*!< Volatile, to avoid compiler aliasing */
 }
 tMPI_Atomic_ptr_t;
 
 
-
 typedef struct tMPI_Spinlock
 {
-    volatile unsigned int lock;  /*!< Volatile, to avoid compiler aliasing */
+    int lock __attribute__ ((aligned(64)));  
 }
 tMPI_Spinlock_t;
 
 
+#define tMPI_Atomic_get(a)   (int)((a)->value) 
+#define tMPI_Atomic_set(a,i)  (((a)->value) = (i))
+#define tMPI_Atomic_ptr_get(a)   ((a)->value) 
+#define tMPI_Atomic_ptr_set(a,i)  (((a)->value) = (i))
+
 #define TMPI_SPINLOCK_INITIALIZER   { 0 }
 
 
-#define tMPI_Atomic_get(a)   (int)((a)->value) 
-#define tMPI_Atomic_set(a,i)  (((a)->value) = (i))
-#define tMPI_Atomic_ptr_get(a)   (void*)((a)->value) 
-#define tMPI_Atomic_ptr_set(a,i)  (((a)->value) = (void*)(i))
+static inline int tMPI_Atomic_cas(tMPI_Atomic_t *a, int oldval, int newval)
+{
+#ifdef TMPI_XLC_INTRINSICS
+    int ret;
+
+    __eieio(); /* these memory barriers are neccesary */
+    __fence(); /* and this one needs to be here to avoid aliasing issues */
+    ret=(__compare_and_swap(&(a->value), &oldval, newval));
+    __isync();
+    __fence(); /* and this one needs to be here to avoid aliasing issues */
+    return ret;
+#else
+    int prev;
+    __asm__ __volatile__ ("1:    lwarx   %0,0,%2 \n"
+                          "\t cmpw    0,%0,%3 \n"
+                          "\t bne     2f \n"
+                          "\t stwcx.  %4,0,%2 \n"
+                          "\t bne-    1b \n"
+                          "\t sync \n"
+                          "2: \n"
+                          : "=&r" (prev), "=m" (a->value)
+                          : "r" (&a->value), "r" (oldval), "r" (newval),
+                          "m" (a->value));
+
+    return prev==oldval;
+#endif
+}
 
 
-static int tMPI_Atomic_add_return(tMPI_Atomic_t *    a, 
-                                        int               i)
+static inline int tMPI_Atomic_ptr_cas(tMPI_Atomic_ptr_t *a, void* oldval,
+                                      void* newval)
 {
-    int t;
+    int ret;
+    volatile char* volatile* oldv=oldval;
+    volatile char* volatile* newv=newval;
+
+    __eieio(); /* these memory barriers are neccesary */
+    __fence(); /* and this one needs to be here to avoid aliasing issues */
+#if (!defined (__LP64__) ) && (!defined(__powerpc64__) ) 
+    ret=__compare_and_swap((int *)&(a->value), (int*)&oldv, (int)newv);
+#else
+    ret=__compare_and_swaplp((long *)&(a->value), (long*)&oldv, (long)newv);
+#endif
+    __isync();
+    __fence();
+
+    return ret;
+}
+
+
+
+
+static inline int tMPI_Atomic_add_return(tMPI_Atomic_t *a, int i)
+{
+#ifdef TMPI_XLC_INTRINSICS
+    int oldval, newval;
     
+    do
+    {
+        __eieio(); /* these memory barriers are neccesary */
+        oldval = tMPI_Atomic_get(a);
+        newval = oldval + i;
+    }
+    /*while(!__compare_and_swap( &(a->value), &oldval, newval));*/
+    while(__check_lock_mp( &(a->value), oldval, newval));
+
+    __isync();
+
+    return newval;
+#else
+    int t;
+
     __asm__ __volatile__("1:     lwarx   %0,0,%2 \n"
                          "\t add     %0,%1,%0 \n"
                          "\t stwcx.  %0,0,%2 \n"
@@ -100,145 +178,84 @@ static int tMPI_Atomic_add_return(tMPI_Atomic_t *    a,
                          : "=&r" (t)
                          : "r" (i), "r" (&a->value) );
     return t;
+#endif
 }
 
 
 
-static int tMPI_Atomic_fetch_add(tMPI_Atomic_t *     a,
-                                       int                i)
+static inline int tMPI_Atomic_fetch_add(tMPI_Atomic_t *a, int i)
 {
-    int t;
+#ifdef TMPI_XLC_INTRINSICS
+    int oldval,newval;
     
+    do
+    {
+        __eieio(); /* these memory barriers are neccesary */
+        oldval = tMPI_Atomic_get(a);
+        newval = oldval + i;
+    }
+    /*while(__check_lock_mp((const int*)&(a->value), oldval, newval));*/
+    while(__check_lock_mp( &(a->value), oldval, newval));
+    /*while(!__compare_and_swap( &(a->value), &oldval, newval));*/
+    __isync();
+
+    return oldval;
+#else
+    int t;
+
     __asm__ __volatile__("\t eieio\n"
-                         "1:     lwarx   %0,0,%2 \n"                         
+                         "1:     lwarx   %0,0,%2 \n"
                          "\t add     %0,%1,%0 \n"
                          "\t stwcx.  %0,0,%2 \n"
                          "\t bne-    1b \n"
                          "\t isync \n"
                          : "=&r" (t)
                          : "r" (i), "r" (&a->value));
-    
-    return (t - i);    
-}
-
-
-static int tMPI_Atomic_cas(tMPI_Atomic_t *       a,
-                                     int                  oldval,
-                                     int                  newval)
-{
-    int prev;
-    
-    __asm__ __volatile__ ("1:    lwarx   %0,0,%2 \n"
-                          "\t cmpw    0,%0,%3 \n"
-                          "\t bne     2f \n"
-                          "\t stwcx.  %4,0,%2 \n"
-                          "\t bne-    1b \n"
-                          "\t sync \n"
-                          "2: \n"
-                          : "=&r" (prev), "=m" (a->value)
-                          : "r" (&a->value), "r" (oldval), "r" (newval), 
-                            "m" (a->value));
-    
-    return prev;
-}
-
-static void* tMPI_Atomic_ptr_cas(tMPI_Atomic_ptr_t *   a,
-                                           void*                oldval,
-                                           void*                newval)
-{
-    void* prev;
-   
 
-#if (!defined(__PPC64__)) && (!defined(__ppc64))
-    __asm__ __volatile__ ("1:    lwarx   %0,0,%2 \n"
-                          "\t cmpw    0,%0,%3 \n"
-                          "\t bne     2f \n"
-                          "\t stwcx.  %4,0,%2 \n"
-                          "\t bne-    1b \n"
-                          "\t sync \n"
-                          "2: \n"
-                          : "=&r" (prev), "=m" (a->value)
-                          : "r" (&a->value), "r" (oldval), "r" (newval), 
-                            "m" (a->value));
-    
-#else
-    __asm__ __volatile__ ("1:    ldarx   %0,0,%2 \n"
-                          "\t cmpd    0,%0,%3 \n"
-                          "\t bne     2f \n"
-                          "\t stdcx.  %4,0,%2 \n"
-                          "\t bne-    1b \n"
-                          "\t sync \n"
-                          "2: \n"
-                          : "=&r" (prev), "=m" (a->value)
-                          : "r" (&a->value), "r" (oldval), "r" (newval), 
-                            "m" (a->value));
+    return (t - i);
 #endif
-    return prev;
 }
 
 
-static void tMPI_Spinlock_init(tMPI_Spinlock_t *x)
+static inline void tMPI_Spinlock_init(tMPI_Spinlock_t *x)
 {
-    x->lock = 0;
+    __clear_lock_mp((const int*)x,0);
 }
 
 
-
-static void tMPI_Spinlock_lock(tMPI_Spinlock_t *  x)
+static inline void tMPI_Spinlock_lock(tMPI_Spinlock_t *x)
 {
-    unsigned int tmp;
-    
-    __asm__ __volatile__("\t b      1f \n"
-                         "2:      lwzx    %0,0,%1 \n"
-                         "\t cmpwi   0,%0,0 \n"
-                         "\t bne+    2b \n"
-                         "1:      lwarx   %0,0,%1 \n"
-                         "\t cmpwi   0,%0,0 \n"
-                         "\t bne-    2b \n"
-                         "\t stwcx.  %2,0,%1 \n"
-                         "\t bne-    2b \n"
-                         "\t isync\n"
-                         : "=&r"(tmp)
-                         : "r"(&x->lock), "r"(1));
+    do
+    {
+        tMPI_Atomic_memory_barrier();
+    }
+    while(__check_lock_mp(&(x->lock), 0, 1));
 }
 
 
-static int tMPI_Spinlock_trylock(tMPI_Spinlock_t *  x)
+static inline int tMPI_Spinlock_trylock(tMPI_Spinlock_t *x)
 {
-    unsigned int old, t;
-    unsigned int mask = 1;
-    volatile unsigned int *p = &x->lock;
-    
-    __asm__ __volatile__("\t eieio\n"
-                         "1:      lwarx   %0,0,%4 \n"
-                         "\t or      %1,%0,%3 \n"
-                         "\t stwcx.  %1,0,%4 \n"
-                         "\t bne     1b \n"
-                         "\t sync \n"
-                         : "=&r" (old), "=&r" (t), "=m" (*p)
-                         : "r" (mask), "r" (p), "m" (*p));
-    
-    return ((old & mask) != 0);    
+    /* Return 0 if we got the lock */
+    return (__check_lock_mp(&(x->lock), 0, 1) != 0);
 }
 
 
-static void tMPI_Spinlock_unlock(tMPI_Spinlock_t *  x)
+static inline void tMPI_Spinlock_unlock(tMPI_Spinlock_t *x)
 {
-    __asm__ __volatile__("\t eieio \n");
-    x->lock = 0;
+    __clear_lock_mp(&(x->lock),0);
 }
 
 
-static int tMPI_Spinlock_islocked(const tMPI_Spinlock_t *   x)
+static inline int tMPI_Spinlock_islocked(const tMPI_Spinlock_t *x)
 {
-    return ( x->lock != 0);
+    tMPI_Atomic_memory_barrier();
+    return ((x->lock) != 0);
 }
 
 
-static void tMPI_Spinlock_wait(tMPI_Spinlock_t *   x)
+static inline void tMPI_Spinlock_wait(tMPI_Spinlock_t *x)
 {
-    
-    do 
+    do
     {
         tMPI_Atomic_memory_barrier();
     }
@@ -246,5 +263,3 @@ static void tMPI_Spinlock_wait(tMPI_Spinlock_t *   x)
 }
 
 
-
-
index 2ce38d7fea785db69a1a9bae1fb30485869070af..3ad3d4a931f8163f98330828d5c26133078439d8 100644 (file)
@@ -55,7 +55,6 @@ man_MANS =   \
        g_rotmat.1  \
        g_saltbr.1  \
        g_sas.1  \
-       g_sdf.1  \
        g_select.1  \
        g_sgangle.1  \
        g_sham.1  \
index 1b2bfc016f6f7f3d80ed53a9f869d622913beb82..67deebc9fb158e7121fca94a326a5256ceeacb83 100644 (file)
@@ -183,7 +183,7 @@ static void tMPI_Coll_envt_init(struct coll_env_thread *met, int N)
     tMPI_Atomic_set(&(met->n_remaining), 0);
     met->buf=(void**)tMPI_Malloc(sizeof(void*)*N);
     met->bufsize=(size_t*)tMPI_Malloc(sizeof(size_t)*N);
-    met->read_data=(gmx_bool*)tMPI_Malloc(sizeof(gmx_bool)*N);
+    met->read_data=(tmpi_bool*)tMPI_Malloc(sizeof(tmpi_bool)*N);
 #ifdef USE_COLLECTIVE_COPY_BUFFER
     met->cpbuf=(tMPI_Atomic_ptr_t*)tMPI_Malloc(sizeof(tMPI_Atomic_ptr_t)*N);
     met->cb=NULL;
@@ -322,7 +322,7 @@ static void tMPI_Mult_recv(tMPI_Comm comm, struct coll_env *cev, int rank,
     {
         void *srcbuf;
 #ifdef USE_COLLECTIVE_COPY_BUFFER
-        gmx_bool decrease_ctr=FALSE;
+        tmpi_bool decrease_ctr=FALSE;
 #endif
 
         if ( sendsize > recvsize ) 
@@ -442,7 +442,7 @@ static void tMPI_Post_multi(struct coll_env *cev, int myrank, int index,
     int i;
 #ifdef USE_COLLECTIVE_COPY_BUFFER
     /* decide based on the number of waiting threads */
-    gmx_bool using_cb=(bufsize < (size_t)(n_remaining*COPY_BUFFER_SIZE));
+    tmpi_bool using_cb=(bufsize < (size_t)(n_remaining*COPY_BUFFER_SIZE));
 
     cev->met[myrank].using_cb=using_cb;
     if (using_cb)
@@ -525,8 +525,8 @@ static void tMPI_Wait_for_others(struct coll_env *cev, int myrank)
            be double-buffering) so we always spin here. */
         tMPI_Atomic_memory_barrier();
 #if 0
-        while (tMPI_Atomic_cas( &(cev->met[rank].buf_readcount), 0,
-                                    -100000) != 0)
+        while (!tMPI_Atomic_cas( &(cev->met[rank].buf_readcount), 0,
+                                    -100000))
 #endif
 #if 1
         while (tMPI_Atomic_fetch_add( &(cev->met[myrank].buf_readcount), 0) 
index 04106eced860851972147ac6baeafef838f066cd..99747beac91a127922cb8696e1b5fc06d37d93dd 100644 (file)
@@ -126,7 +126,7 @@ int tMPI_Comm_compare(tMPI_Comm comm1, tMPI_Comm comm2, int *result)
     {
         if (comm1->grp.peers[i] != comm2->grp.peers[i])
         {
-            gmx_bool found=FALSE;
+            tmpi_bool found=FALSE;
 
             *result=TMPI_SIMILAR;
             for(j=0;j<comm2->grp.N;j++)
@@ -172,32 +172,6 @@ tMPI_Comm tMPI_Comm_alloc(tMPI_Comm parent, int N)
     /* initialize the main barrier */
     tMPI_Barrier_init(&(ret->barrier), N);
 
-#if 0
-    {
-        /* calculate the number of reduce barriers */
-        int Nbarriers=0;
-        int Nred=N;
-        while(Nred>1) {
-            Nbarriers+=1;
-            Nred = Nred/2 + Nred%2;
-        } 
-
-        ret->Nreduce_barriers=Nbarriers;
-        ret->reduce_barrier=(tMPI_Barrier_t*)
-                  tMPI_Malloc(sizeof(tMPI_Barrier_t)*(Nbarriers+1));
-        ret->N_reduce_barrier=(int*)tMPI_Malloc(sizeof(int)*(Nbarriers+1));
-        Nred=N;
-        for(i=0;i<Nbarriers;i++)
-        {
-            tMPI_Barrier_init( &(ret->reduce_barrier[i]), Nred);
-            ret->N_reduce_barrier[i]=Nred;
-            /* Nred is now Nred/2 + a rest term because solitary 
-               process at the end of the list must still be accounter for */
-            Nred = Nred/2 + Nred%2;
-        }
-    }
-#endif
-
     /* the reduce barriers */
     {
         /* First calculate the number of reduce barriers */
@@ -397,7 +371,7 @@ static void tMPI_Split_colors(int N, const int *color, const int *key,
                               int *group)
 {
     int i,j;
-    gmx_bool found;
+    tmpi_bool found;
 
     /* reset groups */
     for(i=0;i<N;i++)
@@ -450,7 +424,7 @@ int tMPI_Comm_split(tMPI_Comm comm, int color, int key, tMPI_Comm *newcomm)
                                                 the threads actually suplies 
                                                 these arrays to the comm 
                                                 structure) */
-    gmx_bool i_am_first=FALSE;
+    tmpi_bool i_am_first=FALSE;
     int myrank=tMPI_Comm_seek_rank(comm, tMPI_Get_current());
     struct tmpi_split *spl;
 
index fa2bf0cb021d5c3889906906b36f30744900065f..b60d6d46b0efb65132a7da6393083e8b0640b37a 100644 (file)
@@ -60,7 +60,7 @@ files.
 
 /* Group query & manipulation functions */
 
-gmx_bool tMPI_In_group(tMPI_Group group)
+tmpi_bool tMPI_In_group(tMPI_Group group)
 {
     int i;
     struct tmpi_thread *cur;
index 0ae59008631390026ab0acd4e5c1897502108e36..e5f9e43b593085cabff467a3b102c90822867216 100644 (file)
@@ -46,7 +46,7 @@ files.
 #endif
 
 #ifdef HAVE_SYS_TIME_H
-#include <unistd.h>
+#include <sys/time.h>
 #endif
 
 #include <errno.h>
@@ -77,21 +77,9 @@ files.
 **************************************************************************/
 
 
-
-#ifndef __cplusplus
-typedef int gmx_bool;
+typedef int tmpi_bool;
 #define TRUE 1
 #define FALSE 0
-#else
-#ifndef TRUE
-#define TRUE true
-#endif
-#ifndef FALSE
-#define FALSE false
-#endif
-#endif
-
-
 
 
 
@@ -190,7 +178,7 @@ struct envelope
     size_t bufsize; /* the size of the data to be transmitted */
     tMPI_Datatype datatype; /* the data type */
 
-    gmx_bool nonblock; /* whether the receiver is non-blocking */
+    tmpi_bool nonblock; /* whether the receiver is non-blocking */
 
     /* state, values from enum_envelope_state .  
        (there's a few busy-waits relying on this flag). 
@@ -206,10 +194,10 @@ struct envelope
     /* prev and next envelopes in the send/recv_envelope_list linked list  */
     struct envelope *prev,*next;
 
-    gmx_bool send; /* whether this is a send envelope (if TRUE), or a receive 
+    tmpi_bool send; /* whether this is a send envelope (if TRUE), or a receive 
                   envelope (if FALSE) */
 #ifdef USE_SEND_RECV_COPY_BUFFER
-    gmx_bool using_cb; /* whether a copy buffer is (going to be) used */
+    tmpi_bool using_cb; /* whether a copy buffer is (going to be) used */
     void* cb;/* the allocated copy buffer pointer */
 #endif
     /* the next and previous envelopes in the request list */
@@ -272,7 +260,7 @@ struct recv_envelope_list
 /* the request object for asynchronious operations. */
 struct tmpi_req_
 {
-    gmx_bool finished; /* whether it's finished */
+    tmpi_bool finished; /* whether it's finished */
     struct envelope *ev; /* the envelope */
 
     struct tmpi_thread *source; /* the message source (for receives) */
@@ -280,7 +268,7 @@ struct tmpi_req_
     int tag; /* the tag */
     int error; /* error code */
     size_t transferred; /* the number of transferred bytes */
-    gmx_bool cancelled; /* whether the transmission was canceled */
+    tmpi_bool cancelled; /* whether the transmission was canceled */
 
     struct tmpi_req_ *next,*prev; /* next,prev request in linked list,
                                      used in the req_list, but also in 
@@ -346,7 +334,7 @@ struct coll_env_thread
     size_t *bufsize; /* array of number of bytes to send/recv */
 
 #ifdef USE_COLLECTIVE_COPY_BUFFER
-    gmx_bool using_cb; /* whether a copy buffer is (going to be) used */
+    tmpi_bool using_cb; /* whether a copy buffer is (going to be) used */
     tMPI_Atomic_t buf_readcount; /* Number of threads reading from buf
                                     while using_cpbuf is true, but cpbuf 
                                     is still NULL.  */
@@ -359,7 +347,7 @@ struct coll_env_thread
                            and the coll_env_thread is ready for re-use. */
     tMPI_Event recv_ev; /* event associated with being a receiving thread. */
 
-    gmx_bool *read_data; /* whether we read data from a specific thread. */
+    tmpi_bool *read_data; /* whether we read data from a specific thread. */
 };
 
 /* Collective communications once sync. These run in parallel with
@@ -608,7 +596,7 @@ struct tmpi_split
 { 
     volatile int Ncol_init;
     volatile int Ncol_destroy;
-    volatile gmx_bool can_finish;
+    volatile tmpi_bool can_finish;
     volatile int *colors;
     volatile int *keys;
 };
@@ -655,7 +643,7 @@ struct tmpi_datatype_
     tMPI_Op_fn *op_functions; /* array of op functions for this datatype */
     int N_comp; /* number of components */
     struct tmpi_datatype_component *comps; /* the components */
-    gmx_bool committed; /* whether the data type is committed */
+    tmpi_bool committed; /* whether the data type is committed */
 };
 /* just as a shorthand:  */
 typedef struct tmpi_datatype_ tmpi_dt;
@@ -736,9 +724,9 @@ int tMPI_Error(tMPI_Comm comm, int tmpi_errno);
 
 
 /* check whether we're the main thread */
-gmx_bool tMPI_Is_master(void);
+tmpi_bool tMPI_Is_master(void);
 /* check whether the current process is in a group */
-gmx_bool tMPI_In_group(tMPI_Group group);
+tmpi_bool tMPI_In_group(tMPI_Group group);
 
 /* find the rank of a thread in a comm */
 int tMPI_Comm_seek_rank(tMPI_Comm comm, struct tmpi_thread *th);
index 0a8efcf20637a36cd63ce027e3a4e3b8b7011d46..24634d03011e5bcdb56be113d21b81cdd6705efe 100644 (file)
@@ -43,6 +43,18 @@ files.
 #include "config.h"
 #endif
 
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
+
+#include <errno.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
 
 #include "thread_mpi/list.h"
 
@@ -65,7 +77,7 @@ void tMPI_Stack_push(tMPI_Stack *st, tMPI_Stack_element *el)
         head=(tMPI_Stack_element*)tMPI_Atomic_ptr_get( &(st->head) );
         el->next=head;
     }
-    while (tMPI_Atomic_ptr_cas(&(st->head), head, el)!=(void*)head);
+    while (!tMPI_Atomic_ptr_cas(&(st->head), head, el));
 }
 
 tMPI_Stack_element *tMPI_Stack_pop(tMPI_Stack *st)
@@ -78,7 +90,7 @@ tMPI_Stack_element *tMPI_Stack_pop(tMPI_Stack *st)
             next=head->next;
         else
             next=NULL;
-    } while (tMPI_Atomic_ptr_cas(&(st->head), head, next)!=(void*)head);
+    } while (!tMPI_Atomic_ptr_cas(&(st->head), head, next));
 
     return head;
 }
@@ -89,7 +101,7 @@ tMPI_Stack_element *tMPI_Stack_detach(tMPI_Stack *st)
     do
     {
         head=(tMPI_Stack_element*)tMPI_Atomic_ptr_get( &(st->head) );
-    } while (tMPI_Atomic_ptr_cas(&(st->head), head, NULL)!=(void*)head);
+    } while (!tMPI_Atomic_ptr_cas(&(st->head), head, NULL));
 
     return head;
 }
@@ -118,7 +130,7 @@ void tMPI_Queue_enqueue(tMPI_Queue *q, tMPI_Queue_element *qe)
 
     do
     {
-    } while (tMPI_Atomic_ptr_cas(&(q->head), head, 
+    } while (!tMPI_Atomic_ptr_cas(&(q->head), head, next));
 }
 #endif
 
index 7b2fa10382a61abc70c9adea7bb1b4a0cc023525..9d3d4877de7f005809078ad66eadab79b326deea 100644 (file)
@@ -84,7 +84,7 @@ int tMPI_Once(tMPI_Comm comm, void (*function)(void*), void *param,
     if ((csync->syncs - syncs > 0) && /* check if sync was an earlier number. 
                                          If it is a later number, we can't 
                                          have been the first to arrive here. */
-        tMPI_Atomic_cas(&(cev->coll.current_sync), syncs, csync->syncs)==syncs)
+        tMPI_Atomic_cas(&(cev->coll.current_sync), syncs, csync->syncs))
     {
         /* we're the first! */
         function(param);
@@ -124,7 +124,7 @@ void* tMPI_Once_wait(tMPI_Comm comm, void* (*function)(void*), void *param,
                                          Calculating the difference instead
                                          of comparing directly avoids ABA 
                                          problems. */
-        tMPI_Atomic_cas(&(cev->coll.current_sync), syncs, csync->syncs)==syncs)
+        tMPI_Atomic_cas(&(cev->coll.current_sync), syncs, csync->syncs))
     {
         /* we're the first! */
         ret=function(param);
index 678903893743dd27399034279f853de64adb30a9..c2835146f8b2c09ba81e23128679602070c3fa40 100644 (file)
@@ -131,14 +131,15 @@ static void tMPI_Xfer(struct tmpi_thread *cur, struct envelope *sev,
 
 
 /* check for the completion of a single request */
-static gmx_bool tMPI_Test_single(struct tmpi_thread *cur, struct tmpi_req_ *rq);
+static tmpi_bool tMPI_Test_single(struct tmpi_thread *cur, 
+                                  struct tmpi_req_ *rq);
 /* check and wait for the completion of a single request */
 static void tMPI_Wait_single(struct tmpi_thread *cur, struct tmpi_req_ *rq);
 
 /* check for the completion of a NULL-delimited doubly linked list of 
    requests */
-static gmx_bool tMPI_Test_multi(struct tmpi_thread *cur, struct tmpi_req_ *rqs,
-                            gmx_bool *any_done);
+static tmpi_bool tMPI_Test_multi(struct tmpi_thread *cur, struct tmpi_req_ *rqs,
+                                 tmpi_bool *any_done);
 
 
 
index a197b95320a3882ba532aa7fa5afd197da91b733..733ef9e7be04772a02d38d8489758fd1a08d81bd 100644 (file)
@@ -187,8 +187,7 @@ tMPI_Send_env_list_fetch_new(struct send_envelope_list *evl)
             /* we detach by swapping what we expect the pointer value to be,
                with NULL. If there were a cross-platform way to atomically 
                swap  without checking, we could do that, too. */
-            while(tMPI_Atomic_ptr_cas( &(evl->head_rts), ret, NULL ) !=
-                  (void*)ret)
+            while(!tMPI_Atomic_ptr_cas( &(evl->head_rts), ret, NULL ))
             {
                 ret=(struct envelope*)tMPI_Atomic_ptr_get(&(evl->head_rts));
             }
@@ -264,7 +263,7 @@ static void tMPI_Send_env_list_rts(struct envelope *sev)
         /* the cmpxchg operation is a memory fence, so we shouldn't need
            to worry about out-of-order evaluation */
     }
-    while (tMPI_Atomic_ptr_cas( &(evl->head_rts), sevn, sev ) != (void*)sevn);
+    while (!tMPI_Atomic_ptr_cas( &(evl->head_rts), sevn, sev ));
 #else
     tMPI_Spinlock_lock( &(evl->lock_rts) );
     ev->next=(struct envelope*)evl->head_rts;
@@ -294,7 +293,6 @@ static void tMPI_Send_env_list_add_new(struct tmpi_thread *cur,
 {
 #ifdef TMPI_LOCK_FREE_LISTS
     struct envelope *evl_head_new_orig;
-    struct envelope *evl_cas;
 #endif
     sev->prev=NULL;
 
@@ -312,12 +310,10 @@ static void tMPI_Send_env_list_add_new(struct tmpi_thread *cur,
         sev->next=evl_head_new_orig;
         /* do the compare-and-swap. 
            this operation is a memory fence, so we shouldn't need
-           to worry about out-of-order stores */
-        evl_cas=(struct envelope*)tMPI_Atomic_ptr_cas(&(evl->head_new), 
-                                                      evl_head_new_orig, sev);
-        /* and compare the results: if they aren't the same,
+           to worry about out-of-order stores. If it returns false, 
            somebody else got there before us: */
-    } while (evl_cas != evl_head_new_orig); 
+    } while (!tMPI_Atomic_ptr_cas(&(evl->head_new), evl_head_new_orig, sev));
+
 #else
     tMPI_Spinlock_lock( &(evl->lock_new) );
     /* we add to the start of the list */
@@ -524,8 +520,8 @@ static void tMPI_Set_status(struct tmpi_req_ *req, tMPI_Status *st)
 }
 
 
-static gmx_bool tMPI_Envelope_matches(const struct envelope *sev,
-                                  const struct envelope *rev)
+static tmpi_bool tMPI_Envelope_matches(const struct envelope *sev,
+                                       const struct envelope *rev)
 {
 #ifdef TMPI_DEBUG
     printf("%5d: tMPI_Envelope_matches (%d->%d)==(%d->%d),  tag=(%d==%d),       \n       datatype=(%ld==%ld), comm=(%ld,%ld),\n              finished=(%d==%d)\n",
@@ -618,8 +614,7 @@ static void tMPI_Send_copy_buffer(struct envelope *sev, struct tmpi_req_ *req)
         /* first copy */
         memcpy(sev->cb, sev->buf, sev->bufsize);
         /* now set state, if other side hasn't started copying yet. */
-        if (tMPI_Atomic_cas( &(sev->state), env_unmatched, env_cb_available)
-            == env_unmatched)
+        if (tMPI_Atomic_cas( &(sev->state), env_unmatched, env_cb_available))
         {
             /* if it was originally unmatched, the receiver wasn't 
                copying the old buffer. We can don't need to wait,
@@ -660,7 +655,7 @@ static void tMPI_Send_copy_buffer(struct envelope *sev, struct tmpi_req_ *req)
 static struct envelope* tMPI_Prep_send_envelope(struct send_envelope_list *evl, 
                         tMPI_Comm comm, struct tmpi_thread *src, 
                         struct tmpi_thread *dest, void *buf, int count, 
-                        tMPI_Datatype datatype, int tag, gmx_bool nonblock)
+                        tMPI_Datatype datatype, int tag, tmpi_bool nonblock)
 {
     /* get an envelope from the send-envelope stack */
     struct envelope *ev=tMPI_Send_env_list_fetch_new( evl );
@@ -702,7 +697,7 @@ static struct envelope* tMPI_Prep_send_envelope(struct send_envelope_list *evl,
 static struct envelope* tMPI_Prep_recv_envelope(struct tmpi_thread *cur, 
                         tMPI_Comm comm, struct tmpi_thread *src, 
                         struct tmpi_thread *dest, void *buf, int count, 
-                        tMPI_Datatype datatype, int tag, gmx_bool nonblock)
+                        tMPI_Datatype datatype, int tag, tmpi_bool nonblock)
 {
     /* get an envelope from the stack */
     struct envelope *ev=tMPI_Free_env_list_fetch_recv( &(cur->envelopes) );
@@ -749,7 +744,7 @@ static void tMPI_Xfer(struct tmpi_thread *cur, struct envelope *sev,
 #ifdef USE_SEND_RECV_COPY_BUFFER
     /* we remove the sender's envelope only if we do the transfer, which 
        we always do if the buffer size = 0 */ 
-    gmx_bool remove_sender = (sev->bufsize==0);
+    tmpi_bool remove_sender = (sev->bufsize==0);
 #endif
 #ifdef TMPI_DEBUG
     printf("%5d: tMPI_Xfer (%d->%d, tag=%d) started\n", 
@@ -778,8 +773,7 @@ static void tMPI_Xfer(struct tmpi_thread *cur, struct envelope *sev,
         if (sev->using_cb)
         {
             /* check if the other side has already finished copying */
-            if (tMPI_Atomic_cas( &(sev->state), env_unmatched, env_copying)
-                != env_unmatched)
+            if (!tMPI_Atomic_cas( &(sev->state), env_unmatched, env_copying))
             {
                 /* it has, and we're copying from the new buffer. 
                    We're now also tasked with removing the envelope */
@@ -853,7 +847,7 @@ static struct envelope* tMPI_Post_match_recv(struct tmpi_thread *cur,
                                              struct tmpi_thread *src, 
                                              void *recv_buf, int recv_count, 
                                              tMPI_Datatype datatype, 
-                                             int tag, gmx_bool nonblock)
+                                             int tag, tmpi_bool nonblock)
 {
     struct tmpi_thread *dest=cur;
     struct envelope *rev;
@@ -922,7 +916,7 @@ static struct envelope *tMPI_Post_send(struct tmpi_thread *cur,
                                        struct tmpi_thread *dest, 
                                        void *send_buf, int send_count,
                                        tMPI_Datatype datatype, int tag, 
-                                       gmx_bool nonblock)
+                                       tmpi_bool nonblock)
 {
     struct tmpi_thread *src=cur;
     struct envelope *sev;
@@ -983,18 +977,14 @@ static void tMPI_Wait_process_incoming(struct tmpi_thread *cur)
 #ifdef TMPI_LOCK_FREE_LISTS
             /* Behold our lock-free shared linked list:
                (see tMPI_Send_env_list_add_new for more info) */
-            struct envelope *evl_cas;
-
             do
             {
                 /* read old head atomically */
                 sev_head=(struct envelope*)
                           tMPI_Atomic_ptr_get( &(cur->evs[i].head_new) );
                 /* do the compare-and-swap to detach the list */
-                evl_cas=(struct envelope*)
-                          tMPI_Atomic_ptr_cas(&(cur->evs[i].head_new), sev_head,
-                                              NULL);
-            } while (evl_cas != sev_head);
+            } while (!tMPI_Atomic_ptr_cas(&(cur->evs[i].head_new), sev_head,
+                                              NULL));
 #else
             tMPI_Spinlock_lock( &(cur->evs[i].lock_new) );
             sev_head=(struct send_envelope*)cur->evs[i].head_new;
@@ -1041,7 +1031,7 @@ static void tMPI_Wait_process_incoming(struct tmpi_thread *cur)
     tMPI_Event_process( &(cur->p2p_event), n_handled);
 }
 
-static gmx_bool tMPI_Test_single(struct tmpi_thread *cur, struct tmpi_req_ *rq)
+static tmpi_bool tMPI_Test_single(struct tmpi_thread *cur, struct tmpi_req_ *rq)
 {
     struct envelope *ev=rq->ev;
 
@@ -1091,10 +1081,10 @@ static void tMPI_Wait_single(struct tmpi_thread *cur, struct tmpi_req_ *rq)
     } while(TRUE);
 }
 
-static gmx_bool tMPI_Test_multi(struct tmpi_thread *cur, struct tmpi_req_ *rqs,
-                            gmx_bool *any_done)
+static tmpi_bool tMPI_Test_multi(struct tmpi_thread *cur, struct tmpi_req_ *rqs,
+                                 tmpi_bool *any_done)
 {
-    gmx_bool all_done=TRUE;
+    tmpi_bool all_done=TRUE;
     struct tmpi_req_ *creq=rqs;
 
     int i=0;
@@ -1103,7 +1093,7 @@ static gmx_bool tMPI_Test_multi(struct tmpi_thread *cur, struct tmpi_req_ *rqs,
 
     while(creq)
     {
-        gmx_bool finished=tMPI_Test_single(cur, creq);
+        tmpi_bool finished=tMPI_Test_single(cur, creq);
         i++;
 
         /* now do the check */
index b15a32bc6f5c0ae6dbbf83dd709d4042809e16c5..743030833e5acc580711c652ba5be1f7d2a4346d 100644 (file)
@@ -146,7 +146,7 @@ int tMPI_Test(tMPI_Request *request, int *flag, tMPI_Status *status)
    blocking = whether to block until all reqs are completed */
 static void tMPI_Test_multi_req(struct tmpi_thread *cur, 
                                 int count, tMPI_Request *array_of_requests,
-                                gmx_bool wait, gmx_bool blocking)
+                                tmpi_bool wait, tmpi_bool blocking)
 {
     int i;
     struct tmpi_req_ *first=NULL, *last=NULL;
index 49ae34033f8cc1712aa06758692cc61a8c840a5f..36a12236e0ead69bdca14ef649827b4120a7eb26 100644 (file)
@@ -149,14 +149,14 @@ int tMPI_Reduce_fast(void* sendbuf, void* recvbuf, int count,
                     /* for the first iteration, the inputs are in the 
                        sendbuf*/
                     a=sendbuf;
-                    b=tMPI_Atomic_ptr_get(&(comm->reduce_sendbuf[nbr]));
+                    b=(void*)tMPI_Atomic_ptr_get(&(comm->reduce_sendbuf[nbr]));
                 }
                 else
                 {
                     /* after the first operation, they're already in 
                        the recvbuf */
                     a=recvbuf;
-                    b=tMPI_Atomic_ptr_get(&(comm->reduce_recvbuf[nbr]));
+                    b=(void*)tMPI_Atomic_ptr_get(&(comm->reduce_recvbuf[nbr]));
                 }
                 /* here we check for overlapping buffers */
                 if (a==b)
@@ -304,7 +304,7 @@ int tMPI_Allreduce(void* sendbuf, void* recvbuf, int count,
     tMPI_Profile_wait_stop(cur, TMPIWAIT_Reduce);
 #endif
     /* distribute rootbuf */
-    rootbuf=tMPI_Atomic_ptr_get(&(comm->reduce_recvbuf[0]));
+    rootbuf=(void*)tMPI_Atomic_ptr_get(&(comm->reduce_recvbuf[0]));
 
     /* and now we just copy things back. We know that the root thread 
        arrives last, so there's no point in using tMPI_Scatter with 
index 02e5c0921936747228ab46537add3b5ae74717c4..394b2f5a203ae8ee069a12208b1c1dabffb1cad2 100644 (file)
@@ -72,7 +72,7 @@ int tMPI_Scatter(void* sendbuf, int sendcount, tMPI_Datatype sendtype,
         size_t sendsize=sendtype->size*sendcount;
         size_t total_send_size=0;
 #ifdef USE_COLLECTIVE_COPY_BUFFER
-        gmx_bool using_cb;
+        tmpi_bool using_cb;
 #endif
 
         if (!sendbuf) /* don't do pointer arithmetic on a NULL ptr */
@@ -207,7 +207,7 @@ int tMPI_Scatterv(void* sendbuf, int *sendcounts, int *displs,
         int i;
         size_t total_send_size=0;
 #ifdef USE_COLLECTIVE_COPY_BUFFER
-        gmx_bool using_cb;
+        tmpi_bool using_cb;
 #endif
 
         if (!sendbuf) /* don't do pointer arithmetic on a NULL ptr */
index a157ec23e8083b313de360e5d7496e2684e2caaa..97495326e98fbbb7bf2b4f0e5c62e075134ccfb8 100644 (file)
@@ -87,7 +87,7 @@ tMPI_Thread_key_t id_key; /* the key to get the thread id */
 
 /* whether MPI has finalized (we need this to distinguish pre-inited from
        post-finalized states */
-static gmx_bool tmpi_finalized=FALSE;
+static tmpi_bool tmpi_finalized=FALSE;
 
 /* misc. global information about MPI */
 struct tmpi_global *tmpi_global=NULL;
@@ -100,7 +100,7 @@ struct tmpi_global *tmpi_global=NULL;
 
 
 /* start N threads with argc, argv (used by tMPI_Init)*/
-void tMPI_Start_threads(gmx_bool main_returns, int N, int *argc, char ***argv, 
+void tMPI_Start_threads(tmpi_bool main_returns, int N, int *argc, char ***argv, 
                         void (*start_fn)(void*), void *start_arg,
                         int (*start_fn_main)(int, char**));
 
@@ -193,7 +193,7 @@ struct tmpi_thread *tMPI_Get_thread(tMPI_Comm comm, int rank)
 }
 #endif
 
-gmx_bool tMPI_Is_master(void)
+tmpi_bool tMPI_Is_master(void)
 {
     /* if there are no other threads, we're the main thread */
     if ( (!TMPI_COMM_WORLD) || TMPI_COMM_WORLD->grp.N==0)
@@ -201,7 +201,7 @@ gmx_bool tMPI_Is_master(void)
 
     /* otherwise we know this through thread specific data: */
     /* whether the thread pointer points to the head of the threads array */
-    return (gmx_bool)(tMPI_Get_current() == threads); 
+    return (tmpi_bool)(tMPI_Get_current() == threads); 
 }
 
 tMPI_Comm tMPI_Get_comm_self(void)
@@ -376,7 +376,7 @@ static void* tMPI_Thread_starter(void *arg)
 }
 
 
-void tMPI_Start_threads(gmx_bool main_returns, int N, int *argc, char ***argv, 
+void tMPI_Start_threads(tmpi_bool main_returns, int N, int *argc, char ***argv, 
                         void (*start_fn)(void*), void *start_arg,
                         int (*start_fn_main)(int, char**))
 {
index 121f6bcb6a8c7c25ac97eb2c988ec14d29dc7d15..5d1e76ade363ebf296d857aad95c988b961b778a 100644 (file)
@@ -275,7 +275,7 @@ int tMPI_Type_commit(tMPI_Datatype *datatype)
         struct tmpi_datatype_ *lt=tmpi_global->usertypes[i];
         if (lt->committed && lt->N_comp==dt->N_comp)
         {
-            gmx_bool found=TRUE;
+            tmpi_bool found=TRUE;
             for(j=0;j<lt->N_comp;j++)
             {
                 if ( (lt->comps[j].type  != dt->comps[j].type) ||
@@ -293,7 +293,7 @@ int tMPI_Type_commit(tMPI_Datatype *datatype)
     }
     if (dt != *datatype)
     {
-        gmx_bool found=FALSE;
+        tmpi_bool found=FALSE;
         /* we remove the old one from the list */
         for(i=0;i<tmpi_global->N_usertypes;i++)
         {