message(STATUS "OpenMP multithreading not supported with gcc/llvm-gcc 4.2 on Mac OS X, disabled")
set(GMX_OPENMP OFF CACHE BOOL
"OpenMP multithreading not not supported with gcc/llvm-gcc 4.2 on Mac OS X, disabled!" FORCE)
+ elseif(CMAKE_C_COMPILER_ID MATCHES "Cray" AND CMAKE_VERSION VERSION_LESS 3)
+ message(STATUS "OpenMP multithreading is not detected correctly for the Cray compiler with CMake before version 3.0 (see http://public.kitware.com/Bug/view.php?id=14567)")
+ set(GMX_OPENMP OFF CACHE BOOL
+ "OpenMP multithreading is not detected correctly for the Cray compiler with CMake before version 3.0 (see http://public.kitware.com/Bug/view.php?id=14567)" FORCE)
else()
# We should do OpenMP detection if we get here
# OpenMP check must come before other CFLAGS!
"#include<xmmintrin.h>
int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_rsqrt_ps(x);return 0;}"
SIMD_C_FLAGS
- "-msse2" "/arch:SSE2")
+ "-msse2" "/arch:SSE2" "-hgnu")
gmx_find_cxxflag_for_source(CXXFLAGS_SSE2 "C++ compiler SSE2 flag"
"#include<xmmintrin.h>
int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_rsqrt_ps(x);return 0;}"
SIMD_CXX_FLAGS
- "-msse2" "/arch:SSE2")
+ "-msse2" "/arch:SSE2" "-hgnu")
if(NOT CFLAGS_SSE2 OR NOT CXXFLAGS_SSE2)
message(FATAL_ERROR "Cannot find SSE2 compiler flag. Use a newer compiler, or disable SIMD (slower).")
"#include<smmintrin.h>
int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_dp_ps(x,x,0x77);return 0;}"
SIMD_C_FLAGS
- "-msse4.1" "/arch:SSE4.1" "/arch:SSE2")
+ "-msse4.1" "/arch:SSE4.1" "/arch:SSE2" "-hgnu")
gmx_find_cxxflag_for_source(CXXFLAGS_SSE4_1 "C++ compiler SSE4.1 flag"
"#include<smmintrin.h>
int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_dp_ps(x,x,0x77);return 0;}"
SIMD_CXX_FLAGS
- "-msse4.1" "/arch:SSE4.1" "/arch:SSE2")
+ "-msse4.1" "/arch:SSE4.1" "/arch:SSE2" "-hgnu")
if(NOT CFLAGS_SSE4_1 OR NOT CXXFLAGS_SSE4_1)
message(FATAL_ERROR "Cannot find SSE4.1 compiler flag. "
"#include<immintrin.h>
int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_permute_ps(x,1);return 0;}"
SIMD_C_FLAGS
- "-mavx" "/arch:AVX")
+ "-mavx" "/arch:AVX" "-hgnu")
gmx_find_cxxflag_for_source(CXXFLAGS_AVX_128 "C++ compiler AVX (128 bit) flag"
"#include<immintrin.h>
int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_permute_ps(x,1);return 0;}"
SIMD_CXX_FLAGS
- "-mavx" "/arch:AVX")
+ "-mavx" "/arch:AVX" "-hgnu")
### STAGE 2: Find the fused-multiply add flag.
# GCC requires x86intrin.h for FMA support. MSVC 2010 requires intrin.h for FMA support.
${INCLUDE_INTRIN_H}
int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_macc_ps(x,x,x);return 0;}"
SIMD_C_FLAGS
- "-mfma4")
+ "-mfma4" "-hgnu")
gmx_find_cxxflag_for_source(CXXFLAGS_AVX_128_FMA "C++ compiler AVX (128 bit) FMA4 flag"
"#include<immintrin.h>
${INCLUDE_X86INTRIN_H}
${INCLUDE_INTRIN_H}
int main(){__m128 x=_mm_set1_ps(0.5);x=_mm_macc_ps(x,x,x);return 0;}"
SIMD_CXX_FLAGS
- "-mfma4")
+ "-mfma4" "-hgnu")
# We only need to check the last (FMA) test; that will always fail if the basic AVX128 test failed
if(NOT CFLAGS_AVX_128_FMA OR NOT CXXFLAGS_AVX_128_FMA)
"#include<immintrin.h>
int main(){__m256 x=_mm256_set1_ps(0.5);x=_mm256_add_ps(x,x);return 0;}"
SIMD_C_FLAGS
- "-mavx" "/arch:AVX")
+ "-mavx" "/arch:AVX" "-hgnu")
gmx_find_cxxflag_for_source(CXXFLAGS_AVX "C++ compiler AVX (256 bit) flag"
"#include<immintrin.h>
int main(){__m256 x=_mm256_set1_ps(0.5);x=_mm256_add_ps(x,x);return 0;}"
SIMD_CXX_FLAGS
- "-mavx" "/arch:AVX")
+ "-mavx" "/arch:AVX" "-hgnu")
if(NOT CFLAGS_AVX OR NOT CXXFLAGS_AVX)
message(FATAL_ERROR "Cannot find AVX compiler flag. Use a newer compiler, or choose SSE4.1 SIMD (slower).")
"#include<immintrin.h>
int main(){__m256 x=_mm256_set1_ps(0.5);x=_mm256_fmadd_ps(x,x,x);return 0;}"
SIMD_C_FLAGS
- "-march=core-avx2" "-mavx2" "/arch:AVX") # no AVX2-specific flag for MSVC yet
+ "-march=core-avx2" "-mavx2" "/arch:AVX" "-hgnu") # no AVX2-specific flag for MSVC yet
gmx_find_cxxflag_for_source(CXXFLAGS_AVX2 "C++ compiler AVX2 flag"
"#include<immintrin.h>
int main(){__m256 x=_mm256_set1_ps(0.5);x=_mm256_fmadd_ps(x,x,x);return 0;}"
SIMD_CXX_FLAGS
- "-march=core-avx2" "-mavx2" "/arch:AVX") # no AVX2-specific flag for MSVC yet
+ "-march=core-avx2" "-mavx2" "/arch:AVX" "-hgnu") # no AVX2-specific flag for MSVC yet
if(NOT CFLAGS_AVX2 OR NOT CXXFLAGS_AVX2)
message(FATAL_ERROR "Cannot find AVX2 compiler flag. Use a newer compiler, or choose AVX SIMD (slower).")
Some compatible compilers, like icc on linux+mac will take this path,
too */
#if ( (defined(__GNUC__) || defined(__PATHSCALE__) || defined(__PGI)) && \
- (!defined(__xlc__)) && (!defined(TMPI_TEST_NO_ATOMICS)) )
+ (!defined(__xlc__)) && (!defined(_CRAYC)) && (!defined(TMPI_TEST_NO_ATOMICS)) )
#ifdef __GNUC__
#define TMPI_GCC_VERSION (__GNUC__ * 10000 \
/* Fujitsu FX10 SPARC compiler requires gcc compatibility with -Xg */
#error Atomics support for Fujitsu FX10 compiler requires -Xg (gcc compatibility)
+#elif defined(_CRAYC)
+/* Cray compiler */
+#include "atomic/cce.h"
#else
#ifndef DOXYGEN
--- /dev/null
+/*
+ This source code file is part of thread_mpi.
+ Original for gcc written by Sander Pronk, Erik Lindahl, and possibly
+ others. Modified for the Cray compiler by Daniel Landau.
+
+ Copyright (c) 2009, Sander Pronk, Erik Lindahl.
+ Copyright 2014, Cray Inc.
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+ 1) Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ 2) Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ 3) Neither the name of the copyright holders nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY US ''AS IS'' AND ANY
+ EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL WE BE LIABLE FOR ANY
+ DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ If you want to redistribute modifications, please consider that
+ scientific software is very special. Version control is crucial -
+ bugs must be traceable. We will be happy to consider code for
+ inclusion in the official distribution, but derived work should not
+ be called official thread_mpi. Details are found in the README & COPYING
+ files.
+ */
+
+#include <intrinsics.h>
+
+#define tMPI_Atomic_memory_barrier() __builtin_ia32_mfence()
+
+
+typedef struct tMPI_Atomic
+{
+ volatile long value;
+}
+tMPI_Atomic_t;
+
+typedef struct tMPI_Atomic_ptr
+{
+ volatile void* value;
+}
+tMPI_Atomic_ptr_t;
+
+
+/* these are guaranteed to be atomic on x86 and x86_64 */
+#define tMPI_Atomic_get(a) ((int)( (a)->value) )
+#define tMPI_Atomic_set(a, i) (((a)->value) = (i))
+
+
+#define tMPI_Atomic_ptr_get(a) ((void*)((a)->value) )
+#define tMPI_Atomic_ptr_set(a, i) (((a)->value) = (void*)(i))
+
+
+#include "cce_intrinsics.h"
+
+#include "cce_spinlock.h"
--- /dev/null
+/*
+ This source code file is part of thread_mpi.
+ Original for gcc written by Sander Pronk, Erik Lindahl, and possibly
+ others. Modified for the Cray compiler by Daniel Landau.
+
+
+ Copyright (c) 2009, Sander Pronk, Erik Lindahl.
+ Copyright 2014, Cray Inc.
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+ 1) Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ 2) Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ 3) Neither the name of the copyright holders nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY US ''AS IS'' AND ANY
+ EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL WE BE LIABLE FOR ANY
+ DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ If you want to redistribute modifications, please consider that
+ scientific software is very special. Version control is crucial -
+ bugs must be traceable. We will be happy to consider code for
+ inclusion in the official distribution, but derived work should not
+ be called official thread_mpi. Details are found in the README & COPYING
+ files.
+ */
+
+#include <intrinsics.h>
+
+#define tMPI_Atomic_memory_barrier() __builtin_ia32_mfence()
+
+TMPI_EXPORT
+static inline int tMPI_Atomic_cas(tMPI_Atomic_t *a, int oldval, int newval)
+{
+ return __sync_val_compare_and_swap(&(a->value), oldval, newval) == oldval;
+}
+
+TMPI_EXPORT
+static inline int tMPI_Atomic_ptr_cas(tMPI_Atomic_ptr_t* a, void *oldval,
+ void *newval)
+{
+ return __sync_val_compare_and_swap((size_t*)&(a->value), (size_t)oldval, (size_t)newval) == (size_t)oldval;
+}
+
+TMPI_EXPORT
+static inline int tMPI_Atomic_add_return(tMPI_Atomic_t *a, volatile int i)
+{
+ return __sync_add_and_fetch( &(a->value), i);
+}
+#define TMPI_ATOMIC_HAVE_NATIVE_ADD_RETURN
+
+
+TMPI_EXPORT
+static inline int tMPI_Atomic_fetch_add(tMPI_Atomic_t *a, volatile int i)
+{
+ return __sync_fetch_and_add( &(a->value), i);
+}
+#define TMPI_ATOMIC_HAVE_NATIVE_FETCH_ADD
--- /dev/null
+/*
+ This source code file is part of thread_mpi.
+ Original for gcc written by Sander Pronk, Erik Lindahl, and possibly
+ others. Modified for the Cray compiler by Daniel Landau.
+
+ Copyright (c) 2009, Sander Pronk, Erik Lindahl.
+ Copyright 2014, Cray Inc.
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+ 1) Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ 2) Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ 3) Neither the name of the copyright holders nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY US ''AS IS'' AND ANY
+ EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL WE BE LIABLE FOR ANY
+ DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ If you want to redistribute modifications, please consider that
+ scientific software is very special. Version control is crucial -
+ bugs must be traceable. We will be happy to consider code for
+ inclusion in the official distribution, but derived work should not
+ be called official thread_mpi. Details are found in the README & COPYING
+ files.
+ */
+
+#include <intrinsics.h>
+
+typedef struct tMPI_Spinlock
+{
+ volatile long lock /*__attribute__ ((aligned(64)))*/;
+} tMPI_Spinlock_t;
+
+#define TMPI_SPINLOCK_INITIALIZER { 0 }
+
+#define TMPI_ATOMIC_HAVE_NATIVE_SPINLOCK
+
+
+
+static inline void tMPI_Spinlock_init(tMPI_Spinlock_t *x)
+{
+ x->lock = 0;
+}
+
+
+static inline void tMPI_Spinlock_lock(tMPI_Spinlock_t *x)
+{
+ while (__sync_lock_test_and_set(&(x->lock), 1) == 1)
+ {
+ /* this is nicer on the system bus: */
+ while (x->lock == 1)
+ {
+ }
+ }
+}
+
+
+static inline int tMPI_Spinlock_trylock(tMPI_Spinlock_t *x)
+{
+ return __sync_lock_test_and_set(&(x->lock), 1);
+}
+
+
+static inline void tMPI_Spinlock_unlock(tMPI_Spinlock_t *x)
+{
+ x->lock = 0;
+}
+
+static inline int tMPI_Spinlock_islocked(const tMPI_Spinlock_t *x)
+{
+ return ( x->lock == 1 );
+}
+
+static inline void tMPI_Spinlock_wait(tMPI_Spinlock_t *x)
+{
+ do
+ {
+ }
+ while (x->lock == 1);
+}
* one when later linking to the library it might happen that the
* library supports cyclecounters but not the headers, or vice versa.
*/
-#if ((defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)) && \
+#if ((defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__) || defined(_CRAYC)) && \
(defined(__i386__) || defined(__x86_64__)))
static __inline__ int gmx_cycles_have_counter(void)
{
* routine.
*/
#if ((defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)) && \
- (defined(__i386__) || defined(__x86_64__)))
+ (defined(__i386__) || defined(__x86_64__)) && !defined(_CRAYC))
static __inline__ gmx_cycles_t gmx_cycles_read(void)
{
/* x86 with GCC inline assembly - pentium TSC register */
return ret;
}
+#elif defined(_CRAYC)
+#include <intrinsics.h>
+
+static __inline gmx_cycles_t gmx_cycles_read(void)
+{
+ return _rtc();
+}
#else
static gmx_cycles_t gmx_cycles_read(void)
{