Fix compilation issues for AVX-512
authorErik Lindahl <erik@kth.se>
Wed, 29 Nov 2017 21:58:38 +0000 (22:58 +0100)
committerMark Abraham <mark.j.abraham@gmail.com>
Mon, 4 Dec 2017 07:21:27 +0000 (08:21 +0100)
- gcc-5.4.0 incorrectly requires the second argument of
  _mm512_i32gather_pd() to be a double pointer instead
  of void, but this should fix compilation for both
  cases.
- Work around double precision permute instruction
  only available with AVX512VL instructions.

Fixes #2312.

Change-Id: I31420e71064b1c5c25c8af29a1d41c7f372375c1

src/gromacs/simd/impl_x86_avx_512/impl_x86_avx_512_util_double.h
src/gromacs/simd/impl_x86_avx_512/impl_x86_avx_512_util_float.h

index d70854ab5bea20b225315feb4a9c0c998f287742..0a5ab3b06fe022226073559f39d92040c04b3cec 100644 (file)
@@ -290,10 +290,10 @@ reduceIncr4ReturnSum(double *    m,
     t4 = _mm256_add_pd(t4, t3);
     _mm256_store_pd(m, t4);
 
-    t3 = _mm256_add_pd(t3, _mm256_permutex_pd(t3, 0x4E));
-    t3 = _mm256_add_pd(t3, _mm256_permutex_pd(t3, 0xB1));
+    t0 = _mm512_add_pd(t0, _mm512_permutex_pd(t0, 0x4E));
+    t0 = _mm512_add_pd(t0, _mm512_permutex_pd(t0, 0xB1));
 
-    return _mm_cvtsd_f64(_mm256_castpd256_pd128(t3));
+    return _mm_cvtsd_f64(_mm512_castpd512_pd128(t0));
 }
 
 static inline SimdDouble gmx_simdcall
@@ -429,10 +429,10 @@ reduceIncr4ReturnSumHsimd(double *     m,
     t3 = _mm256_add_pd(t3, t2);
     _mm256_store_pd(m, t3);
 
-    t2 = _mm256_add_pd(t2, _mm256_permutex_pd(t2, 0x4E));
-    t2 = _mm256_add_pd(t2, _mm256_permutex_pd(t2, 0xB1));
+    t0 = _mm512_add_pd(t0, _mm512_permutex_pd(t0, 0x4E));
+    t0 = _mm512_add_pd(t0, _mm512_permutex_pd(t0, 0xB1));
 
-    return _mm_cvtsd_f64(_mm256_castpd256_pd128(t2));
+    return _mm_cvtsd_f64(_mm512_castpd512_pd128(t0));
 }
 
 static inline SimdDouble gmx_simdcall
index 851a915c536987198a7265142530974b6198c172..ccca5eb6ed71d925e7349b14989576d0d3b713f8 100644 (file)
@@ -501,7 +501,7 @@ loadU4NOffset(const float* f, int offset)
     const __m256i gdx = _mm256_add_epi32(_mm256_setr_epi32(0, 2, 0, 2, 0, 2, 0, 2),
                                          _mm256_mullo_epi32(idx, _mm256_set1_epi32(offset)));
     return {
-               _mm512_castpd_ps(_mm512_i32gather_pd(gdx, f, sizeof(float)))
+               _mm512_castpd_ps(_mm512_i32gather_pd(gdx, reinterpret_cast<const double*>(f), sizeof(float)))
     };
 }