t4 = _mm256_add_pd(t4, t3);
_mm256_store_pd(m, t4);
- t3 = _mm256_add_pd(t3, _mm256_permutex_pd(t3, 0x4E));
- t3 = _mm256_add_pd(t3, _mm256_permutex_pd(t3, 0xB1));
+ t0 = _mm512_add_pd(t0, _mm512_permutex_pd(t0, 0x4E));
+ t0 = _mm512_add_pd(t0, _mm512_permutex_pd(t0, 0xB1));
- return _mm_cvtsd_f64(_mm256_castpd256_pd128(t3));
+ return _mm_cvtsd_f64(_mm512_castpd512_pd128(t0));
}
static inline SimdDouble gmx_simdcall
t3 = _mm256_add_pd(t3, t2);
_mm256_store_pd(m, t3);
- t2 = _mm256_add_pd(t2, _mm256_permutex_pd(t2, 0x4E));
- t2 = _mm256_add_pd(t2, _mm256_permutex_pd(t2, 0xB1));
+ t0 = _mm512_add_pd(t0, _mm512_permutex_pd(t0, 0x4E));
+ t0 = _mm512_add_pd(t0, _mm512_permutex_pd(t0, 0xB1));
- return _mm_cvtsd_f64(_mm256_castpd256_pd128(t2));
+ return _mm_cvtsd_f64(_mm512_castpd512_pd128(t0));
}
static inline SimdDouble gmx_simdcall
const __m256i gdx = _mm256_add_epi32(_mm256_setr_epi32(0, 2, 0, 2, 0, 2, 0, 2),
_mm256_mullo_epi32(idx, _mm256_set1_epi32(offset)));
return {
- _mm512_castpd_ps(_mm512_i32gather_pd(gdx, f, sizeof(float)))
+ _mm512_castpd_ps(_mm512_i32gather_pd(gdx, reinterpret_cast<const double*>(f), sizeof(float)))
};
}