Thread-MPI fixes for i386 llvm & simplification of atomics

author Sander Pronk <pronk@kth.se>

Sat, 29 Dec 2012 17:29:23 +0000 (18:29 +0100)

committer Sander Pronk <pronk@kth.se>

Sat, 29 Dec 2012 17:29:23 +0000 (18:29 +0100)
author Sander Pronk <pronk@kth.se>
Sat, 29 Dec 2012 17:29:23 +0000 (18:29 +0100)
committer Sander Pronk <pronk@kth.se>
Sat, 29 Dec 2012 17:29:23 +0000 (18:29 +0100)
diff --git a/include/thread_mpi/atomic/gcc_x86.h b/include/thread_mpi/atomic/gcc_x86.h

index 08f878a0198a42ab7af97b093c0da22b64bf363b..9eb6679ca9b635f209a902ef47d6cef00e78dafa 100644 (file)
--- a/include/thread_mpi/atomic/gcc_x86.h
+++ b/include/thread_mpi/atomic/gcc_x86.h
@@ -57,22 +57,23 @@ files.
  
  /* we put all of these on their own cache line by padding the data structure
     to the size of a cache line on x86 (64 bytes): */
+#define TMPI_SIZEOF_X86_CACHE_LINE 64
  typedef struct tMPI_Atomic
  {
      int value; 
-    char padding[64-sizeof(int)];
+    char padding[TMPI_SIZEOF_X86_CACHE_LINE-sizeof(int)];
  } tMPI_Atomic_t;
  
  typedef struct tMPI_Atomic_ptr
  {
      void* value; 
-    char padding[64-sizeof(void*)];
+    char padding[TMPI_SIZEOF_X86_CACHE_LINE-sizeof(void*)];
  } tMPI_Atomic_ptr_t;
  
  typedef struct tMPI_Spinlock
  {
      unsigned int lock; 
-    char padding[64-sizeof(unsigned int)];
+    char padding[TMPI_SIZEOF_X86_CACHE_LINE-sizeof(unsigned int)];
  } tMPI_Spinlock_t;
  
  
@@ -95,9 +96,12 @@ typedef struct tMPI_Spinlock
     as the 486, and gcc on some Linux versions still target 80386 by default). 
    
     We also specifically check for icc, because intrinsics are not always
-   supported there. */
-#if ( (TMPI_GCC_VERSION >= 40100) && defined(__x86_64__) &&  \
-     !defined(__INTEL_COMPILER) ) 
+   supported there.
+
+   llvm has issues with inline assembly and also in 32 bits has support for
+   the gcc intrinsics */
+#if ( ( (TMPI_GCC_VERSION >= 40100) && defined(__x86_64__) &&  \
+      !defined(__INTEL_COMPILER) )  || defined(__llvm__) )
  #include "gcc_intrinsics.h"
  
  #else
@@ -114,7 +118,7 @@ static inline int tMPI_Atomic_add_return(tMPI_Atomic_t *a, int i)
      __asm__ __volatile__("lock ; xaddl %0, %1;"
                           :"=r"(i) :"m"(a->value), "0"(i) : "memory");
      return i + __i;
-}  
+}
  
  static inline int tMPI_Atomic_fetch_add(tMPI_Atomic_t *a, int i)
  {
@@ -125,7 +129,7 @@ static inline int tMPI_Atomic_fetch_add(tMPI_Atomic_t *a, int i)
  
  static inline int tMPI_Atomic_cas(tMPI_Atomic_t *a, int oldval, int newval)
  {
-    unsigned int prev;
+    int prev;
      
      __asm__ __volatile__("lock ; cmpxchgl %1,%2"
                           : "=a"(prev)
diff --git a/src/gmxlib/thread_mpi/collective.c b/src/gmxlib/thread_mpi/collective.c

index ace7940aa5fc761e8aff0d773e076a787c232b05..3cbf07e6aa6e0d82d288ab37c1fd7057edfea37d 100644 (file)
--- a/src/gmxlib/thread_mpi/collective.c
+++ b/src/gmxlib/thread_mpi/collective.c
@@ -331,7 +331,7 @@ void tMPI_Mult_recv(tMPI_Comm comm, struct coll_env *cev, int rank,
                      /* We tried again, and this time there was a copied buffer. 
                         We use that, and indicate that we're not reading from the
                         regular buf. This case should be pretty rare.  */
-                    tMPI_Atomic_fetch_add(&(cev->met[rank].buf_readcount),-1);
+                    tMPI_Atomic_add_return(&(cev->met[rank].buf_readcount),-1);
                      tMPI_Atomic_memory_barrier_acq();
                      srcbuf=try_again_srcbuf;
                  }
@@ -354,7 +354,7 @@ void tMPI_Mult_recv(tMPI_Comm comm, struct coll_env *cev, int rank,
          {
              /* we decrement the read count; potentially releasing the buffer. */
              tMPI_Atomic_memory_barrier_rel();
-            tMPI_Atomic_fetch_add( &(cev->met[rank].buf_readcount), -1);
+            tMPI_Atomic_add_return( &(cev->met[rank].buf_readcount), -1);
          }
  #endif
      }
@@ -481,7 +481,7 @@ void tMPI_Wait_for_others(struct coll_env *cev, int myrank)
      else
      {
          /* wait until everybody else is done copying the original buffer. 
-           We use fetch_add because we want to be sure of coherency.
+           We use atomic add-return because we want to be sure of coherency.
             This wait is bound to be very short (otherwise it wouldn't 
             be double-buffering) so we always spin here. */
          /*tMPI_Atomic_memory_barrier_rel();*/
@@ -490,7 +490,7 @@ void tMPI_Wait_for_others(struct coll_env *cev, int myrank)
                                      -100000))
  #endif
  #if 0
-        while (tMPI_Atomic_fetch_add( &(cev->met[myrank].buf_readcount), 0) 
+        while (tMPI_Atomic_add_return( &(cev->met[myrank].buf_readcount), 0) 
                 != 0)
  #endif
  #if 1
diff --git a/src/gmxlib/thread_mpi/once.c b/src/gmxlib/thread_mpi/once.c

index a25c6db243452b4dfb844f14f57cc380e18b7d97..bee468ceea3fe522f374dbf7a0f633609eb369b8 100644 (file)
--- a/src/gmxlib/thread_mpi/once.c
+++ b/src/gmxlib/thread_mpi/once.c
@@ -137,7 +137,7 @@ void* tMPI_Once_wait(tMPI_Comm comm, void* (*function)(void*), void *param,
  
          tMPI_Atomic_memory_barrier_rel();
          /* signal that we're done */
-        tMPI_Atomic_fetch_add(&(cev->coll.current_sync), 1);
+        tMPI_Atomic_add_return(&(cev->coll.current_sync), 1);
          /* we need to keep being in sync */
          csync->syncs++;
      }
diff --git a/src/gmxlib/thread_mpi/tmpi_init.c b/src/gmxlib/thread_mpi/tmpi_init.c

index 7e59a300baabf3d351e7f6e09d9416b838a86433..ee3a906a0ff296afd8647136880479a4c8afe02e 100644 (file)
--- a/src/gmxlib/thread_mpi/tmpi_init.c
+++ b/src/gmxlib/thread_mpi/tmpi_init.c
@@ -463,7 +463,7 @@ void tMPI_Start_threads(tmpi_bool main_returns, int N,
                  tMPI_Error(TMPI_COMM_WORLD, TMPI_ERR_INIT);
              }
          }
-        /* the main thread now also runs start_fn if we don't want
+        /* the main thread also runs start_fn if we don't want
             it to return */
          if (!main_returns)
              tMPI_Thread_starter((void*)&(threads[0]));
@@ -480,12 +480,11 @@ int tMPI_Init(int *argc, char ***argv,
      tMPI_Trace_print("tMPI_Init(%p, %p, %p)", argc, argv, start_function);
  #endif
  
-
      if (TMPI_COMM_WORLD==0) /* we're the main process */
      {
          int N=0;
          tMPI_Get_N(argc, argv, "-nt", &N);
-        tMPI_Start_threads(FALSE, N, TMPI_AFFINITY_ALL_CORES, argc, argv, 
+        tMPI_Start_threads(TRUE, N, TMPI_AFFINITY_ALL_CORES, argc, argv, 
                             NULL, NULL, start_function);
      }
      else
author	Sander Pronk <pronk@kth.se>
	Sat, 29 Dec 2012 17:29:23 +0000 (18:29 +0100)
committer	Sander Pronk <pronk@kth.se>
	Sat, 29 Dec 2012 17:29:23 +0000 (18:29 +0100)
include/thread_mpi/atomic/gcc_x86.h		patch \| blob \| history
src/gmxlib/thread_mpi/collective.c		patch \| blob \| history
src/gmxlib/thread_mpi/once.c		patch \| blob \| history
src/gmxlib/thread_mpi/tmpi_init.c		patch \| blob \| history