* \note You should NOT scale offsets before calling this routine; it is
* done internally by using the alignment template parameter instead.
*/
-template <int align>
-static inline void gmx_simdcall
-gatherLoadTranspose(const float * base,
- const std::int32_t offset[],
- SimdFloat * v0,
- SimdFloat * v1,
- SimdFloat * v2,
- SimdFloat * v3)
+template<int align>
+static inline void gmx_simdcall gatherLoadTranspose(const float* base,
+ const std::int32_t offset[],
+ SimdFloat* v0,
+ SimdFloat* v1,
+ SimdFloat* v2,
+ SimdFloat* v3)
{
// Offset list must be aligned for SIMD FINT32
- assert(std::size_t(offset) % (GMX_SIMD_FINT32_WIDTH*sizeof(std::int32_t)) == 0);
+ assert(std::size_t(offset) % (GMX_SIMD_FINT32_WIDTH * sizeof(std::int32_t)) == 0);
// Base pointer must be aligned to the smaller of 4 elements and float SIMD width
- assert(std::size_t(base) % (std::min(GMX_SIMD_FLOAT_WIDTH, 4)*sizeof(float)) == 0);
+ assert(std::size_t(base) % (std::min(GMX_SIMD_FLOAT_WIDTH, 4) * sizeof(float)) == 0);
// align parameter must also be a multiple of the above alignment requirement
assert(align % std::min(GMX_SIMD_FLOAT_WIDTH, 4) == 0);
* \note You should NOT scale offsets before calling this routine; it is
* done internally by using the alignment template parameter instead.
*/
-template <int align>
+template<int align>
static inline void gmx_simdcall
-gatherLoadTranspose(const float * base,
- const std::int32_t offset[],
- SimdFloat * v0,
- SimdFloat * v1)
+ gatherLoadTranspose(const float* base, const std::int32_t offset[], SimdFloat* v0, SimdFloat* v1)
{
// Offset list must be aligned for SIMD FINT32
- assert(std::size_t(offset) % (GMX_SIMD_FINT32_WIDTH*sizeof(std::int32_t)) == 0);
+ assert(std::size_t(offset) % (GMX_SIMD_FINT32_WIDTH * sizeof(std::int32_t)) == 0);
// Base pointer must be aligned to the smaller of 2 elements and float SIMD width
- assert(std::size_t(base) % (std::min(GMX_SIMD_FLOAT_WIDTH, 2)*sizeof(float)) == 0);
+ assert(std::size_t(base) % (std::min(GMX_SIMD_FLOAT_WIDTH, 2) * sizeof(float)) == 0);
// align parameter must also be a multiple of the above alignment requirement
assert(align % std::min(GMX_SIMD_FLOAT_WIDTH, 2) == 0);
* starting at the last offset. If you use the Gromacs aligned memory
* allocation routines this will always be the case.
*/
-template <int align>
-static inline void gmx_simdcall
-gatherLoadUTranspose(const float * base,
- const std::int32_t offset[],
- SimdFloat * v0,
- SimdFloat * v1,
- SimdFloat * v2)
+template<int align>
+static inline void gmx_simdcall gatherLoadUTranspose(const float* base,
+ const std::int32_t offset[],
+ SimdFloat* v0,
+ SimdFloat* v1,
+ SimdFloat* v2)
{
// Offset list must be aligned for SIMD FINT32
- assert(std::size_t(offset) % (GMX_SIMD_FINT32_WIDTH*sizeof(std::int32_t)) == 0);
+ assert(std::size_t(offset) % (GMX_SIMD_FINT32_WIDTH * sizeof(std::int32_t)) == 0);
for (std::size_t i = 0; i < v0->simdInternal_.size(); i++)
{
* load the data from memory. On the architectures we have tested this
* is faster even when a SIMD integer datatype is present.
*/
-template <int align>
+template<int align>
static inline void gmx_simdcall
-transposeScatterStoreU(float * base,
- const std::int32_t offset[],
- SimdFloat v0,
- SimdFloat v1,
- SimdFloat v2)
+ transposeScatterStoreU(float* base, const std::int32_t offset[], SimdFloat v0, SimdFloat v1, SimdFloat v2)
{
// Offset list must be aligned for SIMD FINT32
- assert(std::size_t(offset) % (GMX_SIMD_FINT32_WIDTH*sizeof(std::int32_t)) == 0);
+ assert(std::size_t(offset) % (GMX_SIMD_FINT32_WIDTH * sizeof(std::int32_t)) == 0);
for (std::size_t i = 0; i < v0.simdInternal_.size(); i++)
{
* starting at the last offset. If you use the Gromacs aligned memory
* allocation routines this will always be the case.
*/
-template <int align>
+template<int align>
static inline void gmx_simdcall
-transposeScatterIncrU(float * base,
- const std::int32_t offset[],
- SimdFloat v0,
- SimdFloat v1,
- SimdFloat v2)
+ transposeScatterIncrU(float* base, const std::int32_t offset[], SimdFloat v0, SimdFloat v1, SimdFloat v2)
{
// Offset list must be aligned for SIMD FINT32
- assert(std::size_t(offset) % (GMX_SIMD_FINT32_WIDTH*sizeof(std::int32_t)) == 0);
+ assert(std::size_t(offset) % (GMX_SIMD_FINT32_WIDTH * sizeof(std::int32_t)) == 0);
for (std::size_t i = 0; i < v0.simdInternal_.size(); i++)
{
- base[align * offset[i]] += v0.simdInternal_[i];
+ base[align * offset[i]] += v0.simdInternal_[i];
base[align * offset[i] + 1] += v1.simdInternal_[i];
base[align * offset[i] + 2] += v2.simdInternal_[i];
}
* starting at the last offset. If you use the Gromacs aligned memory
* allocation routines this will always be the case.
*/
-template <int align>
+template<int align>
static inline void gmx_simdcall
-transposeScatterDecrU(float * base,
- const std::int32_t offset[],
- SimdFloat v0,
- SimdFloat v1,
- SimdFloat v2)
+ transposeScatterDecrU(float* base, const std::int32_t offset[], SimdFloat v0, SimdFloat v1, SimdFloat v2)
{
// Offset list must be aligned for SIMD FINT32
- assert(std::size_t(offset) % (GMX_SIMD_FINT32_WIDTH*sizeof(std::int32_t)) == 0);
+ assert(std::size_t(offset) % (GMX_SIMD_FINT32_WIDTH * sizeof(std::int32_t)) == 0);
for (std::size_t i = 0; i < v0.simdInternal_.size(); i++)
{
- base[align * offset[i]] -= v0.simdInternal_[i];
+ base[align * offset[i]] -= v0.simdInternal_[i];
base[align * offset[i] + 1] -= v1.simdInternal_[i];
base[align * offset[i] + 2] -= v2.simdInternal_[i];
}
* first, second and third pair of SIMD variables, and store the three
* results back into a suitable vector-format array.
*/
-static inline void gmx_simdcall
-expandScalarsToTriplets(SimdFloat scalar,
- SimdFloat * triplets0,
- SimdFloat * triplets1,
- SimdFloat * triplets2)
+static inline void gmx_simdcall expandScalarsToTriplets(SimdFloat scalar,
+ SimdFloat* triplets0,
+ SimdFloat* triplets1,
+ SimdFloat* triplets2)
{
for (std::size_t i = 0; i < scalar.simdInternal_.size(); i++)
{
* a SIMD offset index, since the result of the real-to-integer conversion
* is present in a SIMD register just before calling this routine.
*/
-template <int align>
-static inline void gmx_simdcall
-gatherLoadBySimdIntTranspose(const float * base,
- SimdFInt32 offset,
- SimdFloat * v0,
- SimdFloat * v1,
- SimdFloat * v2,
- SimdFloat * v3)
+template<int align>
+static inline void gmx_simdcall gatherLoadBySimdIntTranspose(const float* base,
+ SimdFInt32 offset,
+ SimdFloat* v0,
+ SimdFloat* v1,
+ SimdFloat* v2,
+ SimdFloat* v3)
{
// Base pointer must be aligned to the smaller of 4 elements and float SIMD width
- assert(std::size_t(base) % (std::min(GMX_SIMD_FLOAT_WIDTH, 4)*sizeof(float)) == 0);
+ assert(std::size_t(base) % (std::min(GMX_SIMD_FLOAT_WIDTH, 4) * sizeof(float)) == 0);
// align parameter must also be a multiple of the above alignment requirement
assert(align % std::min(GMX_SIMD_FLOAT_WIDTH, 4) == 0);
* a SIMD offset index, since the result of the real-to-integer conversion
* is present in a SIMD register just before calling this routine.
*/
-template <int align>
+template<int align>
static inline void gmx_simdcall
-gatherLoadUBySimdIntTranspose(const float * base,
- SimdFInt32 offset,
- SimdFloat * v0,
- SimdFloat * v1)
+ gatherLoadUBySimdIntTranspose(const float* base, SimdFInt32 offset, SimdFloat* v0, SimdFloat* v1)
{
for (std::size_t i = 0; i < v0->simdInternal_.size(); i++)
{
* a SIMD offset index, since the result of the real-to-integer conversion
* is present in a SIMD register just before calling this routine.
*/
-template <int align>
+template<int align>
static inline void gmx_simdcall
-gatherLoadBySimdIntTranspose(const float * base,
- SimdFInt32 offset,
- SimdFloat * v0,
- SimdFloat * v1)
+ gatherLoadBySimdIntTranspose(const float* base, SimdFInt32 offset, SimdFloat* v0, SimdFloat* v1)
{
// Base pointer must be aligned to the smaller of 2 elements and float SIMD width
- assert(std::size_t(base) % (std::min(GMX_SIMD_FLOAT_WIDTH, 2)*sizeof(float)) == 0);
+ assert(std::size_t(base) % (std::min(GMX_SIMD_FLOAT_WIDTH, 2) * sizeof(float)) == 0);
// align parameter must also be a multiple of the above alignment requirement
assert(align % std::min(GMX_SIMD_FLOAT_WIDTH, 2) == 0);
* do not need these, this extra code will be optimized away completely if you
* just ignore the return value (Checked with gcc-4.9.1 and clang-3.6 for AVX).
*/
-static inline float gmx_simdcall
-reduceIncr4ReturnSum(float * m,
- SimdFloat v0,
- SimdFloat v1,
- SimdFloat v2,
- SimdFloat v3)
+static inline float gmx_simdcall reduceIncr4ReturnSum(float* m, SimdFloat v0, SimdFloat v1, SimdFloat v2, SimdFloat v3)
{
float sum[4]; // Note that the 4 here corresponds to the 4 m-elements, not any SIMD width
// Make sure the memory pointer is aligned to the smaller of 4 elements and float SIMD width
- assert(std::size_t(m) % (std::min(GMX_SIMD_FLOAT_WIDTH, 4)*sizeof(float)) == 0);
+ assert(std::size_t(m) % (std::min(GMX_SIMD_FLOAT_WIDTH, 4) * sizeof(float)) == 0);
sum[0] = reduce(v0);
sum[1] = reduce(v1);
*
* Available if \ref GMX_SIMD_HAVE_HSIMD_UTIL_FLOAT is 1.
*/
-static inline SimdFloat gmx_simdcall
-loadDualHsimd(const float * m0,
- const float * m1)
+static inline SimdFloat gmx_simdcall loadDualHsimd(const float* m0, const float* m1)
{
- SimdFloat a;
+ SimdFloat a;
// Make sure the memory pointers are aligned to half float SIMD width
- assert(std::size_t(m0) % (GMX_SIMD_FLOAT_WIDTH/2*sizeof(float)) == 0);
- assert(std::size_t(m1) % (GMX_SIMD_FLOAT_WIDTH/2*sizeof(float)) == 0);
+ assert(std::size_t(m0) % (GMX_SIMD_FLOAT_WIDTH / 2 * sizeof(float)) == 0);
+ assert(std::size_t(m1) % (GMX_SIMD_FLOAT_WIDTH / 2 * sizeof(float)) == 0);
- for (std::size_t i = 0; i < a.simdInternal_.size()/2; i++)
+ for (std::size_t i = 0; i < a.simdInternal_.size() / 2; i++)
{
- a.simdInternal_[i] = m0[i];
- a.simdInternal_[a.simdInternal_.size()/2 + i] = m1[i];
+ a.simdInternal_[i] = m0[i];
+ a.simdInternal_[a.simdInternal_.size() / 2 + i] = m1[i];
}
return a;
}
*
* Available if \ref GMX_SIMD_HAVE_HSIMD_UTIL_FLOAT is 1.
*/
-static inline SimdFloat gmx_simdcall
-loadDuplicateHsimd(const float * m)
+static inline SimdFloat gmx_simdcall loadDuplicateHsimd(const float* m)
{
- SimdFloat a;
+ SimdFloat a;
// Make sure the memory pointer is aligned
- assert(std::size_t(m) % (GMX_SIMD_FLOAT_WIDTH/2*sizeof(float)) == 0);
+ assert(std::size_t(m) % (GMX_SIMD_FLOAT_WIDTH / 2 * sizeof(float)) == 0);
- for (std::size_t i = 0; i < a.simdInternal_.size()/2; i++)
+ for (std::size_t i = 0; i < a.simdInternal_.size() / 2; i++)
{
- a.simdInternal_[i] = m[i];
- a.simdInternal_[a.simdInternal_.size()/2 + i] = a.simdInternal_[i];
+ a.simdInternal_[i] = m[i];
+ a.simdInternal_[a.simdInternal_.size() / 2 + i] = a.simdInternal_[i];
}
return a;
}
*
* Available if \ref GMX_SIMD_HAVE_HSIMD_UTIL_FLOAT is 1.
*/
-static inline SimdFloat gmx_simdcall
-loadU1DualHsimd(const float * m)
+static inline SimdFloat gmx_simdcall loadU1DualHsimd(const float* m)
{
- SimdFloat a;
+ SimdFloat a;
- for (std::size_t i = 0; i < a.simdInternal_.size()/2; i++)
+ for (std::size_t i = 0; i < a.simdInternal_.size() / 2; i++)
{
- a.simdInternal_[i] = m[0];
- a.simdInternal_[a.simdInternal_.size()/2 + i] = m[1];
+ a.simdInternal_[i] = m[0];
+ a.simdInternal_[a.simdInternal_.size() / 2 + i] = m[1];
}
return a;
}
*
* Available if \ref GMX_SIMD_HAVE_HSIMD_UTIL_FLOAT is 1.
*/
-static inline void gmx_simdcall
-storeDualHsimd(float * m0,
- float * m1,
- SimdFloat a)
+static inline void gmx_simdcall storeDualHsimd(float* m0, float* m1, SimdFloat a)
{
// Make sure the memory pointers are aligned to half float SIMD width
- assert(std::size_t(m0) % (GMX_SIMD_FLOAT_WIDTH/2*sizeof(float)) == 0);
- assert(std::size_t(m1) % (GMX_SIMD_FLOAT_WIDTH/2*sizeof(float)) == 0);
+ assert(std::size_t(m0) % (GMX_SIMD_FLOAT_WIDTH / 2 * sizeof(float)) == 0);
+ assert(std::size_t(m1) % (GMX_SIMD_FLOAT_WIDTH / 2 * sizeof(float)) == 0);
- for (std::size_t i = 0; i < a.simdInternal_.size()/2; i++)
+ for (std::size_t i = 0; i < a.simdInternal_.size() / 2; i++)
{
m0[i] = a.simdInternal_[i];
- m1[i] = a.simdInternal_[a.simdInternal_.size()/2 + i];
+ m1[i] = a.simdInternal_[a.simdInternal_.size() / 2 + i];
}
}
*
* Available if \ref GMX_SIMD_HAVE_HSIMD_UTIL_FLOAT is 1.
*/
-static inline void gmx_simdcall
-incrDualHsimd(float * m0,
- float * m1,
- SimdFloat a)
+static inline void gmx_simdcall incrDualHsimd(float* m0, float* m1, SimdFloat a)
{
// Make sure the memory pointer is aligned to half float SIMD width
- assert(std::size_t(m0) % (GMX_SIMD_FLOAT_WIDTH/2*sizeof(float)) == 0);
- assert(std::size_t(m1) % (GMX_SIMD_FLOAT_WIDTH/2*sizeof(float)) == 0);
+ assert(std::size_t(m0) % (GMX_SIMD_FLOAT_WIDTH / 2 * sizeof(float)) == 0);
+ assert(std::size_t(m1) % (GMX_SIMD_FLOAT_WIDTH / 2 * sizeof(float)) == 0);
- for (std::size_t i = 0; i < a.simdInternal_.size()/2; i++)
+ for (std::size_t i = 0; i < a.simdInternal_.size() / 2; i++)
{
m0[i] += a.simdInternal_[i];
}
- for (std::size_t i = 0; i < a.simdInternal_.size()/2; i++)
+ for (std::size_t i = 0; i < a.simdInternal_.size() / 2; i++)
{
- m1[i] += a.simdInternal_[a.simdInternal_.size()/2 + i];
+ m1[i] += a.simdInternal_[a.simdInternal_.size() / 2 + i];
}
}
*
* Available if \ref GMX_SIMD_HAVE_HSIMD_UTIL_FLOAT is 1.
*/
-static inline void gmx_simdcall
-decrHsimd(float * m,
- SimdFloat a)
+static inline void gmx_simdcall decrHsimd(float* m, SimdFloat a)
{
// Make sure the memory pointer is aligned to half float SIMD width
- assert(std::size_t(m) % (GMX_SIMD_FLOAT_WIDTH/2*sizeof(float)) == 0);
+ assert(std::size_t(m) % (GMX_SIMD_FLOAT_WIDTH / 2 * sizeof(float)) == 0);
- for (std::size_t i = 0; i < a.simdInternal_.size()/2; i++)
+ for (std::size_t i = 0; i < a.simdInternal_.size() / 2; i++)
{
- m[i] -= a.simdInternal_[i] + a.simdInternal_[a.simdInternal_.size()/2 + i];
+ m[i] -= a.simdInternal_[i] + a.simdInternal_[a.simdInternal_.size() / 2 + i];
}
}
*
* Available if \ref GMX_SIMD_HAVE_HSIMD_UTIL_FLOAT is 1.
*/
-template <int align>
-static inline void gmx_simdcall
-gatherLoadTransposeHsimd(const float * base0,
- const float * base1,
- const std::int32_t offset[],
- SimdFloat * v0,
- SimdFloat * v1)
+template<int align>
+static inline void gmx_simdcall gatherLoadTransposeHsimd(const float* base0,
+ const float* base1,
+ const std::int32_t offset[],
+ SimdFloat* v0,
+ SimdFloat* v1)
{
// Offset list must be aligned for half SIMD FINT32 width
- assert(std::size_t(offset) % (GMX_SIMD_FINT32_WIDTH/2*sizeof(std::int32_t)) == 0);
+ assert(std::size_t(offset) % (GMX_SIMD_FINT32_WIDTH / 2 * sizeof(std::int32_t)) == 0);
// base pointers must be aligned to the smaller of 2 elements and float SIMD width
- assert(std::size_t(base0) % (std::min(GMX_SIMD_FLOAT_WIDTH, 2)*sizeof(float)) == 0);
- assert(std::size_t(base1) % (std::min(GMX_SIMD_FLOAT_WIDTH, 2)*sizeof(float)) == 0);
+ assert(std::size_t(base0) % (std::min(GMX_SIMD_FLOAT_WIDTH, 2) * sizeof(float)) == 0);
+ assert(std::size_t(base1) % (std::min(GMX_SIMD_FLOAT_WIDTH, 2) * sizeof(float)) == 0);
// alignment parameter must be also be multiple of the above required alignment
assert(align % std::min(GMX_SIMD_FLOAT_WIDTH, 2) == 0);
- for (std::size_t i = 0; i < v0->simdInternal_.size()/2; i++)
+ for (std::size_t i = 0; i < v0->simdInternal_.size() / 2; i++)
{
- v0->simdInternal_[i] = base0[align * offset[i]];
- v1->simdInternal_[i] = base0[align * offset[i] + 1];
- v0->simdInternal_[v0->simdInternal_.size()/2 + i] = base1[align * offset[i]];
- v1->simdInternal_[v1->simdInternal_.size()/2 + i] = base1[align * offset[i] + 1];
+ v0->simdInternal_[i] = base0[align * offset[i]];
+ v1->simdInternal_[i] = base0[align * offset[i] + 1];
+ v0->simdInternal_[v0->simdInternal_.size() / 2 + i] = base1[align * offset[i]];
+ v1->simdInternal_[v1->simdInternal_.size() / 2 + i] = base1[align * offset[i] + 1];
}
}
*
* Available if \ref GMX_SIMD_HAVE_HSIMD_UTIL_FLOAT is 1.
*/
-static inline float gmx_simdcall
-reduceIncr4ReturnSumHsimd(float * m,
- SimdFloat v0,
- SimdFloat v1)
+static inline float gmx_simdcall reduceIncr4ReturnSumHsimd(float* m, SimdFloat v0, SimdFloat v1)
{
// The 4 here corresponds to the 4 elements in memory, not any SIMD width
float sum[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
- for (std::size_t i = 0; i < v0.simdInternal_.size()/2; i++)
+ for (std::size_t i = 0; i < v0.simdInternal_.size() / 2; i++)
{
sum[0] += v0.simdInternal_[i];
- sum[1] += v0.simdInternal_[v0.simdInternal_.size()/2 + i];
+ sum[1] += v0.simdInternal_[v0.simdInternal_.size() / 2 + i];
sum[2] += v1.simdInternal_[i];
- sum[3] += v1.simdInternal_[v1.simdInternal_.size()/2 + i];
+ sum[3] += v1.simdInternal_[v1.simdInternal_.size() / 2 + i];
}
// Make sure the memory pointer is aligned to the smaller of 4 elements and float SIMD width
- assert(std::size_t(m) % (std::min(GMX_SIMD_FLOAT_WIDTH, 4)*sizeof(float)) == 0);
+ assert(std::size_t(m) % (std::min(GMX_SIMD_FLOAT_WIDTH, 4) * sizeof(float)) == 0);
m[0] += sum[0];
m[1] += sum[1];
* contigous and different values are 4 positions in SIMD
* apart.
*/
-static inline SimdFloat gmx_simdcall
-loadUNDuplicate4(const float* m)
+static inline SimdFloat gmx_simdcall loadUNDuplicate4(const float* m)
{
- SimdFloat a;
- for (std::size_t i = 0; i < a.simdInternal_.size()/4; i++)
+ SimdFloat a;
+ for (std::size_t i = 0; i < a.simdInternal_.size() / 4; i++)
{
- a.simdInternal_[i*4] = m[i];
- a.simdInternal_[i*4+1] = m[i];
- a.simdInternal_[i*4+2] = m[i];
- a.simdInternal_[i*4+3] = m[i];
+ a.simdInternal_[i * 4] = m[i];
+ a.simdInternal_[i * 4 + 1] = m[i];
+ a.simdInternal_[i * 4 + 2] = m[i];
+ a.simdInternal_[i * 4 + 3] = m[i];
}
return a;
}
* contigous and same values are 4 positions in SIMD
* apart.
*/
-static inline SimdFloat gmx_simdcall
-load4DuplicateN(const float* m)
+static inline SimdFloat gmx_simdcall load4DuplicateN(const float* m)
{
- SimdFloat a;
- for (std::size_t i = 0; i < a.simdInternal_.size()/4; i++)
+ SimdFloat a;
+ for (std::size_t i = 0; i < a.simdInternal_.size() / 4; i++)
{
- a.simdInternal_[i*4] = m[0];
- a.simdInternal_[i*4+1] = m[1];
- a.simdInternal_[i*4+2] = m[2];
- a.simdInternal_[i*4+3] = m[3];
+ a.simdInternal_[i * 4] = m[0];
+ a.simdInternal_[i * 4 + 1] = m[1];
+ a.simdInternal_[i * 4 + 2] = m[2];
+ a.simdInternal_[i * 4 + 3] = m[3];
}
return a;
}
* Blocks of 4 floats are loaded from m+n*offset where n
* is the n-th block of 4 floats.
*/
-static inline SimdFloat gmx_simdcall
-loadU4NOffset(const float* m, int offset)
+static inline SimdFloat gmx_simdcall loadU4NOffset(const float* m, int offset)
{
- SimdFloat a;
- for (std::size_t i = 0; i < a.simdInternal_.size()/4; i++)
+ SimdFloat a;
+ for (std::size_t i = 0; i < a.simdInternal_.size() / 4; i++)
{
- a.simdInternal_[i*4] = m[offset*i + 0];
- a.simdInternal_[i*4+1] = m[offset*i + 1];
- a.simdInternal_[i*4+2] = m[offset*i + 2];
- a.simdInternal_[i*4+3] = m[offset*i + 3];
+ a.simdInternal_[i * 4] = m[offset * i + 0];
+ a.simdInternal_[i * 4 + 1] = m[offset * i + 1];
+ a.simdInternal_[i * 4 + 2] = m[offset * i + 2];
+ a.simdInternal_[i * 4 + 3] = m[offset * i + 3];
}
return a;
}
/*! \} */
/*! \endcond */
-} // namespace gmx
+} // namespace gmx
#endif // GMX_SIMD_IMPL_REFERENCE_UTIL_FLOAT_H