endif()
endif()
+string(TOUPPER "${CMAKE_BUILD_TYPE}" _cmake_build_type)
+if (_cmake_build_type STREQUAL "TSAN" AND NOT (GMX_SIMD_ACTIVE STREQUAL "NONE" OR GMX_SIMD_ACTIVE STREQUAL "REFERENCE" OR GMX_SIMD_ACTIVE MATCHES "AVX_512" OR GMX_SIMD_ACTIVE STREQUAL AVX2_256))
+ message(WARNING "TSAN is only tested with SIMD None, Reference, AVX2_256, and AVX_512. It is known to detect (harmless) memory races with SSE and AVX.")
+endif()
+
endmacro()
get_cuda_compiler_info(CUDA_COMPILER_INFO CUDA_COMPILER_FLAGS)
endif()
+string(TOUPPER "${CMAKE_BUILD_TYPE}" CMAKE_BUILD_TYPE_UPPER)
configure_file(config.h.cmakein config.h)
configure_file(gmxpre-config.h.cmakein gmxpre-config.h)
configure_file(buildinfo.h.cmakein buildinfo.h ESCAPE_QUOTES)
/* Cluster size used by nonbonded OpenCL kernel. Should be 8 for NVIDIA/AMD and 4 for Intel */
#define GMX_OCL_NB_CLUSTER_SIZE @GMX_OCL_NB_CLUSTER_SIZE@
+/* Define constants for build types (starting at 1 to make sure undefined values don't match) */
+#define CMAKE_BUILD_TYPE_DEBUG 1
+#define CMAKE_BUILD_TYPE_RELEASE 2
+#define CMAKE_BUILD_TYPE_RELWITHDEBINFO 3
+#define CMAKE_BUILD_TYPE_MINSIZEREL 4
+#define CMAKE_BUILD_TYPE_REFERENCE 5
+#define CMAKE_BUILD_TYPE_RELWITHASSERT 6
+#define CMAKE_BUILD_TYPE_PROFILE 7
+#define CMAKE_BUILD_TYPE_TSAN 8
+#define CMAKE_BUILD_TYPE_ASAN 9
+#define CMAKE_BUILD_TYPE_MSAN 10
+#cmakedefine CMAKE_BUILD_TYPE CMAKE_BUILD_TYPE_@CMAKE_BUILD_TYPE_UPPER@
+
/* Define relative path to OpenCL kernels */
#define GMX_INSTALL_OCLDIR "@GMX_INSTALL_OCLDIR@"
}
#if GMX_SIMD_HAVE_REAL
+
+//! Helper function so that we can run TSAN with SIMD support (where implemented).
+template <int align>
+static inline void gmx_simdcall
+gatherLoadUTransposeTSANSafe(const real *base,
+ const std::int32_t *offset,
+ SimdReal *v0,
+ SimdReal *v1,
+ SimdReal *v2)
+{
+#if (CMAKE_BUILD_TYPE == CMAKE_BUILD_TYPE_TSAN) && GMX_SIMD_X86_AVX2_256
+ // This function is only implemented in this case
+ gatherLoadUTransposeSafe<align>(base, offset, v0, v1, v2);
+#else
+ gatherLoadUTranspose<align>(base, offset, v0, v1, v2);
+#endif
+}
+
/*! \brief Calculate the constraint distance vectors r to project on from x.
*
* Determine the right-hand side of the matrix equation using coordinates xp. */
offset1[i] = bla[bs*2 + i*2 + 1];
}
- gatherLoadUTranspose<3>(reinterpret_cast<const real *>(x), offset0, &x0_S, &y0_S, &z0_S);
- gatherLoadUTranspose<3>(reinterpret_cast<const real *>(x), offset1, &x1_S, &y1_S, &z1_S);
+ gatherLoadUTransposeTSANSafe<3>(reinterpret_cast<const real *>(x), offset0, &x0_S, &y0_S, &z0_S);
+ gatherLoadUTransposeTSANSafe<3>(reinterpret_cast<const real *>(x), offset1, &x1_S, &y1_S, &z1_S);
rx_S = x0_S - x1_S;
ry_S = y0_S - y1_S;
rz_S = z0_S - z1_S;
transposeScatterStoreU<3>(reinterpret_cast<real *>(r + bs), offset2, rx_S, ry_S, rz_S);
- gatherLoadUTranspose<3>(reinterpret_cast<const real *>(xp), offset0, &x0_S, &y0_S, &z0_S);
- gatherLoadUTranspose<3>(reinterpret_cast<const real *>(xp), offset1, &x1_S, &y1_S, &z1_S);
+ gatherLoadUTransposeTSANSafe<3>(reinterpret_cast<const real *>(xp), offset0, &x0_S, &y0_S, &z0_S);
+ gatherLoadUTransposeTSANSafe<3>(reinterpret_cast<const real *>(xp), offset1, &x1_S, &y1_S, &z1_S);
+
rxp_S = x0_S - x1_S;
ryp_S = y0_S - y1_S;
rzp_S = z0_S - z1_S;
offset1[i] = bla[bs*2 + i*2 + 1];
}
- gatherLoadUTranspose<3>(reinterpret_cast<const real *>(x), offset0, &x0_S, &y0_S, &z0_S);
- gatherLoadUTranspose<3>(reinterpret_cast<const real *>(x), offset1, &x1_S, &y1_S, &z1_S);
+ gatherLoadUTransposeTSANSafe<3>(reinterpret_cast<const real *>(x), offset0, &x0_S, &y0_S, &z0_S);
+ gatherLoadUTransposeTSANSafe<3>(reinterpret_cast<const real *>(x), offset1, &x1_S, &y1_S, &z1_S);
+
rx_S = x0_S - x1_S;
ry_S = y0_S - y1_S;
rz_S = z0_S - z1_S;
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2014,2015, by the GROMACS development team, led by
+ * Copyright (c) 2014,2015,2018, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#ifndef GMX_SIMD_IMPL_X86_AVX2_256_UTIL_DOUBLE_H
#define GMX_SIMD_IMPL_X86_AVX2_256_UTIL_DOUBLE_H
+#include "config.h"
+
#include <immintrin.h>
#include "gromacs/simd/impl_x86_avx_256/impl_x86_avx_256_util_double.h"
+namespace gmx
+{
+
+// This version is marginally slower than the AVX 4-wide component load
+// version on Intel Skylake. On older Intel architectures this version
+// is significantly slower.
+template <int align>
+static inline void gmx_simdcall
+gatherLoadUTransposeSafe(const double * base,
+ const std::int32_t offset[],
+ SimdDouble * v0,
+ SimdDouble * v1,
+ SimdDouble * v2)
+{
+ assert(std::size_t(offset) % 16 == 0);
+
+ const SimdDInt32 alignSimd = SimdDInt32(align);
+
+ SimdDInt32 vindex = simdLoad(offset, SimdDInt32Tag());
+ vindex = vindex*alignSimd;
+
+ *v0 = _mm256_i32gather_pd(base + 0, vindex.simdInternal_, sizeof(double));
+ *v1 = _mm256_i32gather_pd(base + 1, vindex.simdInternal_, sizeof(double));
+ *v2 = _mm256_i32gather_pd(base + 2, vindex.simdInternal_, sizeof(double));
+}
+
+} //namespace gmx
+
#endif // GMX_SIMD_IMPL_X86_AVX2_256_UTIL_DOUBLE_H
/*
* This file is part of the GROMACS molecular simulation package.
*
- * Copyright (c) 2014,2015, by the GROMACS development team, led by
+ * Copyright (c) 2014,2015,2018, by the GROMACS development team, led by
* Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
* and including many others, as listed in the AUTHORS file in the
* top-level source directory and at http://www.gromacs.org.
#ifndef GMX_SIMD_IMPL_X86_AVX2_256_UTIL_FLOAT_H
#define GMX_SIMD_IMPL_X86_AVX2_256_UTIL_FLOAT_H
+#include "config.h"
+
#include <immintrin.h>
#include "gromacs/simd/impl_x86_avx_256/impl_x86_avx_256_util_float.h"
+namespace gmx
+{
+
+// This version is marginally slower than the AVX 4-wide component load
+// version on Intel Skylake. On older Intel architectures this version
+// is significantly slower.
+template <int align>
+static inline void gmx_simdcall
+gatherLoadUTransposeSafe(const float * base,
+ const std::int32_t offset[],
+ SimdFloat * v0,
+ SimdFloat * v1,
+ SimdFloat * v2)
+{
+ assert(std::size_t(offset) % 32 == 0);
+
+ const SimdFInt32 alignSimd = SimdFInt32(align);
+
+ SimdFInt32 vindex = simdLoad(offset, SimdFInt32Tag());
+ vindex = vindex*alignSimd;
+
+ *v0 = _mm256_i32gather_ps(base + 0, vindex.simdInternal_, sizeof(float));
+ *v1 = _mm256_i32gather_ps(base + 1, vindex.simdInternal_, sizeof(float));
+ *v2 = _mm256_i32gather_ps(base + 2, vindex.simdInternal_, sizeof(float));
+}
+
+} // namespace gmx
+
#endif // GMX_SIMD_IMPL_X86_AVX2_256_UTIL_FLOAT_H
static const int c_simdBestPairAlignmentDouble = 2;
+// With the implementation below, thread-sanitizer can detect false positives.
+// For loading a triplet, we load 4 floats and ignore the last. Another thread
+// might write to this element, but that will not affect the result.
+// On AVX2 we can use a gather intrinsic instead.
template <int align>
static inline void gmx_simdcall
gatherLoadUTranspose(const double * base,
static const int c_simdBestPairAlignmentFloat = 2;
+// With the implementation below, thread-sanitizer can detect false positives.
+// For loading a triplet, we load 4 floats and ignore the last. Another thread
+// might write to this element, but that will not affect the result.
+// On AVX2 we can use a gather intrinsic instead.
template <int align>
static inline void gmx_simdcall
gatherLoadUTranspose(const float * base,