2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2015,2017, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
39 #include "gromacs/simd/simd.h"
40 #include "gromacs/utility/alignedallocator.h"
41 #include "gromacs/utility/basedefinitions.h"
43 #include "testutils/testasserts.h"
55 /*! \addtogroup module_simd */
58 #if GMX_SIMD_HAVE_REAL
60 /*! \brief Test fixture for higher-level floating-point utility functions.
62 * Inherit from main SimdTest, add code to generate aligned memory and data.
64 class SimdFloatingpointUtilTest : public SimdTest
67 SimdFloatingpointUtilTest()
69 // Resize vectors to get the amount of memory we need
70 integerMemory_.resize(GMX_SIMD_REAL_WIDTH);
72 // The total memory we allocate corresponds to two work arrays
73 // and 4 values each of GMX_SIMD_REAL_WIDTH.
74 realMemory_.resize(2*s_workMemSize_+4*GMX_SIMD_REAL_WIDTH);
76 offset_ = integerMemory_.data();
77 val0_ = realMemory_.data();
78 val1_ = val0_ + GMX_SIMD_REAL_WIDTH;
79 val2_ = val1_ + GMX_SIMD_REAL_WIDTH;
80 val3_ = val2_ + GMX_SIMD_REAL_WIDTH;
81 mem0_ = val3_ + GMX_SIMD_REAL_WIDTH;
82 mem1_ = mem0_ + s_workMemSize_;
84 // Set default values for offset and variables val0_ through val3_
85 // We cannot fill mem_ here since those values depend on the test.
86 for (int i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
88 // Use every third point to avoid a continguous access pattern
90 // Multiply numbers by 1+100*GMX_REAL_EPS ensures some low bits are
91 // set too, so the tests make sure we read all bits correctly.
92 val0_[i] = (i ) * (1.0 + 100*GMX_REAL_EPS);
93 val1_[i] = (i + 0.1) * (1.0 + 100*GMX_REAL_EPS);
94 val2_[i] = (i + 0.2) * (1.0 + 100*GMX_REAL_EPS);
95 val3_[i] = (i + 0.3) * (1.0 + 100*GMX_REAL_EPS);
100 //! \brief Size of memory work buffers
102 // To have a somewhat odd access pattern, we use every
103 // third entry, so the largest value of offset_[i] is 3*GMX_SIMD_REAL_WIDTH.
104 // Then we also allow alignments up to 16, which means the largest index in mem0_[]
105 // that we might access is 16*3*GMX_SIMD_REAL_WIDTH+3.
106 static const std::size_t s_workMemSize_ = 16*3*GMX_SIMD_REAL_WIDTH+4;
108 std::vector<int, AlignedAllocator<int> > integerMemory_; //!< Aligned integer memory
109 std::vector<real, AlignedAllocator<real> > realMemory_; //!< Aligned real memory
111 int * offset_; //!< Pointer to offset indices, aligned memory
112 real * val0_; //!< Pointer to GMX_SIMD_REAL_WIDTH values, aligned
113 real * val1_; //!< Pointer to GMX_SIMD_REAL_WIDTH values, aligned
114 real * val2_; //!< Pointer to GMX_SIMD_REAL_WIDTH values, aligned
115 real * val3_; //!< Pointer to GMX_SIMD_REAL_WIDTH values, aligned
117 real * mem0_; //!< Pointer to aligned memory, s_workMemSize real values
118 real * mem1_; //!< Pointer to aligned memory, s_workMemSize real values
123 TEST_F(SimdFloatingpointUtilTest, gatherLoadTranspose4)
125 SimdReal v0, v1, v2, v3;
126 SimdReal ref0, ref1, ref2, ref3;
127 const int nalign = 3;
128 int alignmentList[nalign] = { 4, 8, 12 };
131 for (i = 0; i < nalign; i++)
133 align = alignmentList[i];
134 for (j = 0; j < GMX_SIMD_REAL_WIDTH; j++)
136 mem0_[align * offset_[j] ] = val0_[j];
137 mem0_[align * offset_[j] + 1] = val1_[j];
138 mem0_[align * offset_[j] + 2] = val2_[j];
139 mem0_[align * offset_[j] + 3] = val3_[j];
142 ref0 = load<SimdReal>(val0_);
143 ref1 = load<SimdReal>(val1_);
144 ref2 = load<SimdReal>(val2_);
145 ref3 = load<SimdReal>(val3_);
149 gatherLoadTranspose<4>(mem0_, offset_, &v0, &v1, &v2, &v3);
153 gatherLoadTranspose<8>(mem0_, offset_, &v0, &v1, &v2, &v3);
155 else if (align == 12)
157 gatherLoadTranspose<12>(mem0_, offset_, &v0, &v1, &v2, &v3);
164 GMX_EXPECT_SIMD_REAL_EQ(ref0, v0);
165 GMX_EXPECT_SIMD_REAL_EQ(ref1, v1);
166 GMX_EXPECT_SIMD_REAL_EQ(ref2, v2);
167 GMX_EXPECT_SIMD_REAL_EQ(ref3, v3);
171 TEST_F(SimdFloatingpointUtilTest, gatherLoadTranspose2)
175 const int nalign = 3;
176 int alignmentList[nalign] = { 2, 4, c_simdBestPairAlignment };
179 EXPECT_TRUE(c_simdBestPairAlignment <= GMX_SIMD_REAL_WIDTH);
181 for (i = 0; i < nalign; i++)
183 align = alignmentList[i];
184 for (j = 0; j < GMX_SIMD_REAL_WIDTH; j++)
186 mem0_[align * offset_[j] ] = val0_[j];
187 mem0_[align * offset_[j] + 1] = val1_[j];
190 ref0 = load<SimdReal>(val0_);
191 ref1 = load<SimdReal>(val1_);
195 gatherLoadTranspose<2>(mem0_, offset_, &v0, &v1);
199 gatherLoadTranspose<4>(mem0_, offset_, &v0, &v1);
201 else if (align == c_simdBestPairAlignment)
203 gatherLoadTranspose<c_simdBestPairAlignment>(mem0_, offset_, &v0, &v1);
210 GMX_EXPECT_SIMD_REAL_EQ(ref0, v0);
211 GMX_EXPECT_SIMD_REAL_EQ(ref1, v1);
215 TEST_F(SimdFloatingpointUtilTest, gatherLoadUTranspose3)
218 SimdReal ref0, ref1, ref2;
219 const int nalign = 2;
220 int alignmentList[nalign] = { 3, 4 };
223 for (i = 0; i < nalign; i++)
225 align = alignmentList[i];
226 for (j = 0; j < GMX_SIMD_REAL_WIDTH; j++)
228 mem0_[align * offset_[j] ] = val0_[j];
229 mem0_[align * offset_[j] + 1] = val1_[j];
230 mem0_[align * offset_[j] + 2] = val2_[j];
233 ref0 = load<SimdReal>(val0_);
234 ref1 = load<SimdReal>(val1_);
235 ref2 = load<SimdReal>(val2_);
239 gatherLoadUTranspose<3>(mem0_, offset_, &v0, &v1, &v2);
243 gatherLoadUTranspose<4>(mem0_, offset_, &v0, &v1, &v2);
250 GMX_EXPECT_SIMD_REAL_EQ(ref0, v0);
251 GMX_EXPECT_SIMD_REAL_EQ(ref1, v1);
252 GMX_EXPECT_SIMD_REAL_EQ(ref2, v2);
256 TEST_F(SimdFloatingpointUtilTest, transposeScatterStoreU3)
259 real refmem[s_workMemSize_];
260 const int nalign = 2;
261 int alignmentList[nalign] = { 3, 4 };
263 FloatingPointTolerance tolerance(defaultRealTolerance());
265 for (i = 0; i < nalign; i++)
267 align = alignmentList[i];
269 // Set test and reference memory to background value
270 for (std::size_t j = 0; j < s_workMemSize_; j++)
272 // Multiply by 1+100*eps to make sure low bits are also used
273 mem0_[j] = refmem[j] = (1000.0 + j) * (1.0 + 100*GMX_REAL_EPS);
276 for (std::size_t j = 0; j < GMX_SIMD_REAL_WIDTH; j++)
278 // set values in _reference_ memory (we will then test with mem0_, and compare)
279 refmem[align * offset_[j] ] = val0_[j];
280 refmem[align * offset_[j] + 1] = val1_[j];
281 refmem[align * offset_[j] + 2] = val2_[j];
284 v0 = load<SimdReal>(val0_);
285 v1 = load<SimdReal>(val1_);
286 v2 = load<SimdReal>(val2_);
290 transposeScatterStoreU<3>(mem0_, offset_, v0, v1, v2);
294 transposeScatterStoreU<4>(mem0_, offset_, v0, v1, v2);
301 for (std::size_t j = 0; j < s_workMemSize_; j++)
303 EXPECT_REAL_EQ_TOL(refmem[j], mem0_[j], tolerance);
308 TEST_F(SimdFloatingpointUtilTest, transposeScatterIncrU3)
311 real refmem[s_workMemSize_];
312 const int nalign = 2;
313 int alignmentList[nalign] = { 3, 4 };
315 FloatingPointTolerance tolerance(defaultRealTolerance());
317 for (i = 0; i < nalign; i++)
319 align = alignmentList[i];
321 // Set test and reference memory to background value
322 for (std::size_t j = 0; j < s_workMemSize_; j++)
324 // Multiply by 1+100*eps to make sure low bits are also used
325 mem0_[j] = refmem[j] = (1000.0 + j) * (1.0 + 100*GMX_REAL_EPS);
328 for (std::size_t j = 0; j < GMX_SIMD_REAL_WIDTH; j++)
330 // Add values to _reference_ memory (we will then test with mem0_, and compare)
331 refmem[align * offset_[j] ] += val0_[j];
332 refmem[align * offset_[j] + 1] += val1_[j];
333 refmem[align * offset_[j] + 2] += val2_[j];
336 v0 = load<SimdReal>(val0_);
337 v1 = load<SimdReal>(val1_);
338 v2 = load<SimdReal>(val2_);
342 transposeScatterIncrU<3>(mem0_, offset_, v0, v1, v2);
346 transposeScatterIncrU<4>(mem0_, offset_, v0, v1, v2);
353 for (std::size_t j = 0; j < s_workMemSize_; j++)
355 EXPECT_REAL_EQ_TOL(refmem[j], mem0_[j], tolerance);
360 TEST_F(SimdFloatingpointUtilTest, transposeScatterIncrU3Overlapping)
363 real refmem[s_workMemSize_];
364 FloatingPointTolerance tolerance(defaultRealTolerance());
366 // Alter offset_ to make all entries point to the same (first) value, so all entries will overlap
367 for (std::size_t j = 0; j < GMX_SIMD_REAL_WIDTH; j++)
372 // Set test and reference memory to background value
373 for (std::size_t j = 0; j < s_workMemSize_; j++)
375 // Multiply by 1+100*eps to make sure low bits are also used
376 mem0_[j] = refmem[j] = (1000.0 + j) * (1.0 + 100*GMX_REAL_EPS);
379 for (std::size_t j = 0; j < GMX_SIMD_REAL_WIDTH; j++)
381 // Add values to _reference_ memory (we will then test with mem0_, and compare)
382 refmem[3 * offset_[j] ] += val0_[j];
383 refmem[3 * offset_[j] + 1] += val1_[j];
384 refmem[3 * offset_[j] + 2] += val2_[j];
387 v0 = load<SimdReal>(val0_);
388 v1 = load<SimdReal>(val1_);
389 v2 = load<SimdReal>(val2_);
391 transposeScatterIncrU<3>(mem0_, offset_, v0, v1, v2);
393 for (std::size_t j = 0; j < s_workMemSize_; j++)
395 EXPECT_REAL_EQ_TOL(refmem[j], mem0_[j], tolerance);
399 TEST_F(SimdFloatingpointUtilTest, transposeScatterDecrU3)
402 real refmem[s_workMemSize_];
403 const int nalign = 2;
404 int alignmentList[nalign] = { 3, 4 };
406 FloatingPointTolerance tolerance(defaultRealTolerance());
408 for (i = 0; i < nalign; i++)
410 align = alignmentList[i];
412 // Set test and reference memory to background value
413 for (std::size_t j = 0; j < s_workMemSize_; j++)
415 // Multiply by 1+100*eps to make sure low bits are also used
416 mem0_[j] = refmem[j] = (1000.0 + j) * (1.0 + 100*GMX_REAL_EPS);
419 for (std::size_t j = 0; j < GMX_SIMD_REAL_WIDTH; j++)
421 // Subtract values from _reference_ memory (we will then test with mem0_, and compare)
422 refmem[align * offset_[j] ] -= val0_[j];
423 refmem[align * offset_[j] + 1] -= val1_[j];
424 refmem[align * offset_[j] + 2] -= val2_[j];
427 v0 = load<SimdReal>(val0_);
428 v1 = load<SimdReal>(val1_);
429 v2 = load<SimdReal>(val2_);
433 transposeScatterDecrU<3>(mem0_, offset_, v0, v1, v2);
437 transposeScatterDecrU<4>(mem0_, offset_, v0, v1, v2);
444 for (std::size_t j = 0; j < s_workMemSize_; j++)
446 EXPECT_REAL_EQ_TOL(refmem[j], mem0_[j], tolerance);
451 TEST_F(SimdFloatingpointUtilTest, transposeScatterDecrU3Overlapping)
454 real refmem[s_workMemSize_];
455 FloatingPointTolerance tolerance(defaultRealTolerance());
457 // Alter offset_ to make all entries point to the same (first) value, so all entries will overlap
458 for (std::size_t j = 0; j < GMX_SIMD_REAL_WIDTH; j++)
463 // Set test and reference memory to background value
464 for (std::size_t j = 0; j < s_workMemSize_; j++)
466 // Multiply by 1+100*eps to make sure low bits are also used
467 mem0_[j] = refmem[j] = (1000.0 + j) * (1.0 + 100*GMX_REAL_EPS);
470 for (std::size_t j = 0; j < GMX_SIMD_REAL_WIDTH; j++)
472 // Subtract values from _reference_ memory (we will then test with mem0_, and compare)
473 refmem[3 * offset_[j] ] -= val0_[j];
474 refmem[3 * offset_[j] + 1] -= val1_[j];
475 refmem[3 * offset_[j] + 2] -= val2_[j];
478 v0 = load<SimdReal>(val0_);
479 v1 = load<SimdReal>(val1_);
480 v2 = load<SimdReal>(val2_);
482 transposeScatterDecrU<3>(mem0_, offset_, v0, v1, v2);
484 for (std::size_t j = 0; j < s_workMemSize_; j++)
486 EXPECT_REAL_EQ_TOL(refmem[j], mem0_[j], tolerance);
490 TEST_F(SimdFloatingpointUtilTest, expandScalarsToTriplets)
492 SimdReal vs, v0, v1, v2;
495 for (i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
500 vs = load<SimdReal>(mem0_);
502 expandScalarsToTriplets(vs, &v0, &v1, &v2);
508 for (i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
510 EXPECT_EQ(i / 3, val0_[i]);
511 EXPECT_EQ((i + GMX_SIMD_REAL_WIDTH) / 3, val1_[i]);
512 EXPECT_EQ((i + 2 * GMX_SIMD_REAL_WIDTH) / 3, val2_[i]);
517 TEST_F(SimdFloatingpointUtilTest, gatherLoadBySimdIntTranspose4)
519 SimdReal v0, v1, v2, v3;
520 SimdReal ref0, ref1, ref2, ref3;
521 SimdInt32 simdoffset;
522 const int nalign = 3;
523 int alignmentList[nalign] = { 4, 8, 12 };
526 for (i = 0; i < nalign; i++)
528 align = alignmentList[i];
529 for (j = 0; j < GMX_SIMD_REAL_WIDTH; j++)
531 mem0_[align * offset_[j] ] = val0_[j];
532 mem0_[align * offset_[j] + 1] = val1_[j];
533 mem0_[align * offset_[j] + 2] = val2_[j];
534 mem0_[align * offset_[j] + 3] = val3_[j];
537 simdoffset = load<SimdInt32>(offset_);
538 ref0 = load<SimdReal>(val0_);
539 ref1 = load<SimdReal>(val1_);
540 ref2 = load<SimdReal>(val2_);
541 ref3 = load<SimdReal>(val3_);
545 gatherLoadBySimdIntTranspose<4>(mem0_, simdoffset, &v0, &v1, &v2, &v3);
549 gatherLoadBySimdIntTranspose<8>(mem0_, simdoffset, &v0, &v1, &v2, &v3);
551 else if (align == 12)
553 gatherLoadBySimdIntTranspose<12>(mem0_, simdoffset, &v0, &v1, &v2, &v3);
560 GMX_EXPECT_SIMD_REAL_EQ(ref0, v0);
561 GMX_EXPECT_SIMD_REAL_EQ(ref1, v1);
562 GMX_EXPECT_SIMD_REAL_EQ(ref2, v2);
563 GMX_EXPECT_SIMD_REAL_EQ(ref3, v3);
568 TEST_F(SimdFloatingpointUtilTest, gatherLoadBySimdIntTranspose2)
572 SimdInt32 simdoffset;
573 const int nalign = 3;
574 int alignmentList[nalign] = { 4, 8, 12 };
577 for (i = 0; i < nalign; i++)
579 align = alignmentList[i];
580 for (j = 0; j < GMX_SIMD_REAL_WIDTH; j++)
582 mem0_[align * offset_[j] ] = val0_[j];
583 mem0_[align * offset_[j] + 1] = val1_[j];
586 simdoffset = load<SimdInt32>(offset_);
587 ref0 = load<SimdReal>(val0_);
588 ref1 = load<SimdReal>(val1_);
592 gatherLoadBySimdIntTranspose<4>(mem0_, simdoffset, &v0, &v1);
596 gatherLoadBySimdIntTranspose<8>(mem0_, simdoffset, &v0, &v1);
598 else if (align == 12)
600 gatherLoadBySimdIntTranspose<12>(mem0_, simdoffset, &v0, &v1);
607 GMX_EXPECT_SIMD_REAL_EQ(ref0, v0);
608 GMX_EXPECT_SIMD_REAL_EQ(ref1, v1);
612 #if GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_REAL
613 TEST_F(SimdFloatingpointUtilTest, gatherLoadUBySimdIntTranspose2)
617 SimdInt32 simdoffset;
618 const int nalign = 3;
619 int alignmentList[nalign] = { 1, 3, 5 };
622 for (i = 0; i < nalign; i++)
624 align = alignmentList[i];
625 for (j = 0; j < GMX_SIMD_REAL_WIDTH; j++)
627 mem0_[align * offset_[j] ] = val0_[j];
628 mem0_[align * offset_[j] + 1] = val1_[j];
631 simdoffset = load<SimdInt32>(offset_);
632 ref0 = load<SimdReal>(val0_);
633 ref1 = load<SimdReal>(val1_);
637 gatherLoadUBySimdIntTranspose<1>(mem0_, simdoffset, &v0, &v1);
641 gatherLoadUBySimdIntTranspose<3>(mem0_, simdoffset, &v0, &v1);
645 gatherLoadUBySimdIntTranspose<5>(mem0_, simdoffset, &v0, &v1);
652 GMX_EXPECT_SIMD_REAL_EQ(ref0, v0);
653 GMX_EXPECT_SIMD_REAL_EQ(ref1, v1);
656 #endif // GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_REAL
658 TEST_F(SimdFloatingpointUtilTest, reduceIncr4Sum)
661 SimdReal v0, v1, v2, v3;
662 real sum0, sum1, sum2, sum3, tstsum;
663 FloatingPointTolerance tolerance(defaultRealTolerance());
665 v0 = load<SimdReal>(val0_);
666 v1 = load<SimdReal>(val1_);
667 v2 = load<SimdReal>(val2_);
668 v3 = load<SimdReal>(val3_);
670 sum0 = sum1 = sum2 = sum3 = 0;
671 for (i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
679 // Just put some numbers in memory so we check the addition is correct
685 tstsum = reduceIncr4ReturnSum(mem0_, v0, v1, v2, v3);
687 EXPECT_REAL_EQ_TOL(c0 + sum0, mem0_[0], tolerance);
688 EXPECT_REAL_EQ_TOL(c1 + sum1, mem0_[1], tolerance);
689 EXPECT_REAL_EQ_TOL(c2 + sum2, mem0_[2], tolerance);
690 EXPECT_REAL_EQ_TOL(c3 + sum3, mem0_[3], tolerance);
692 EXPECT_REAL_EQ_TOL(sum0 + sum1 + sum2 + sum3, tstsum, tolerance);
695 #if GMX_SIMD_HAVE_HSIMD_UTIL_REAL
697 TEST_F(SimdFloatingpointUtilTest, loadDualHsimd)
701 // Point p to the upper half of val0_
702 real * p = val0_ + GMX_SIMD_REAL_WIDTH / 2;
704 v0 = load<SimdReal>(val0_);
705 v1 = loadDualHsimd(val0_, p);
707 GMX_EXPECT_SIMD_REAL_EQ(v0, v1);
710 TEST_F(SimdFloatingpointUtilTest, loadDuplicateHsimd)
714 // Point p to the upper half of val0_
715 real * p = val0_ + GMX_SIMD_REAL_WIDTH / 2;
716 // Copy data so upper half is identical to lower
717 for (i = 0; i < GMX_SIMD_REAL_WIDTH / 2; i++)
722 v0 = load<SimdReal>(val0_);
723 v1 = loadDuplicateHsimd(val0_);
725 GMX_EXPECT_SIMD_REAL_EQ(v0, v1);
729 TEST_F(SimdFloatingpointUtilTest, loadU1DualHsimd)
733 real data[2] = { 1, 2 };
735 // Point p to the upper half of val0_
736 real * p = val0_ + GMX_SIMD_REAL_WIDTH / 2;
737 // Set all low elements to data[0], an high to data[1]
738 for (i = 0; i < GMX_SIMD_REAL_WIDTH / 2; i++)
744 v0 = load<SimdReal>(val0_);
745 v1 = loadU1DualHsimd(data);
747 GMX_EXPECT_SIMD_REAL_EQ(v0, v1);
751 TEST_F(SimdFloatingpointUtilTest, storeDualHsimd)
756 // Point p to the upper half of val0_
757 real * p = val0_ + GMX_SIMD_REAL_WIDTH / 2;
759 v0 = load<SimdReal>(val2_);
760 storeDualHsimd(val0_, p, v0);
762 for (i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
764 EXPECT_EQ(val2_[i], val0_[i]);
768 TEST_F(SimdFloatingpointUtilTest, incrDualHsimd)
770 real reference[GMX_SIMD_REAL_WIDTH];
773 // Create reference values
774 for (std::size_t i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
776 reference[i] = val0_[i] + val2_[i];
779 // Point p to the upper half of val0_
780 real * p = val0_ + GMX_SIMD_REAL_WIDTH / 2;
782 v0 = load<SimdReal>(val2_);
783 incrDualHsimd(val0_, p, v0);
785 for (std::size_t i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
787 EXPECT_EQ(reference[i], val0_[i]);
791 TEST_F(SimdFloatingpointUtilTest, incrDualHsimdOverlapping)
793 real reference[GMX_SIMD_REAL_WIDTH/2];
796 // Create reference values
797 for (std::size_t i = 0; i < GMX_SIMD_REAL_WIDTH/2; i++)
799 reference[i] = val0_[i] + val2_[i] + val2_[GMX_SIMD_REAL_WIDTH/2+i];
802 v0 = load<SimdReal>(val2_);
803 incrDualHsimd(val0_, val0_, v0);
805 for (std::size_t i = 0; i < GMX_SIMD_REAL_WIDTH/2; i++)
807 EXPECT_EQ(reference[i], val0_[i]);
811 TEST_F(SimdFloatingpointUtilTest, decrHsimd)
814 real ref[GMX_SIMD_REAL_WIDTH / 2];
816 FloatingPointTolerance tolerance(defaultRealTolerance());
818 // Point p to the upper half of val1_
819 real * p = val1_ + GMX_SIMD_REAL_WIDTH / 2;
820 for (i = 0; i < GMX_SIMD_REAL_WIDTH / 2; i++)
822 ref[i] = val0_[i] - ( val1_[i] + p[i] );
825 v0 = load<SimdReal>(val1_);
826 decrHsimd(val0_, v0);
828 for (i = 0; i < GMX_SIMD_REAL_WIDTH / 2; i++)
830 EXPECT_REAL_EQ_TOL(ref[i], val0_[i], tolerance);
835 TEST_F(SimdFloatingpointUtilTest, gatherLoadTranspose2Hsimd)
840 const int nalign = 3;
841 int alignmentList[nalign] = { 2, 4, c_simdBestPairAlignment };
844 for (i = 0; i < nalign; i++)
846 align = alignmentList[i];
847 for (j = 0; j < GMX_SIMD_REAL_WIDTH / 2; j++)
849 // Use mem0_ as base for lower half
850 mem0_[align * offset_[j] ] = val0_[j];
851 mem0_[align * offset_[j] + 1] = val1_[j];
852 // Use mem1_ as base for upper half
853 mem1_[align * offset_[j] ] = val0_[GMX_SIMD_REAL_WIDTH / 2 + j];
854 mem1_[align * offset_[j] + 1] = val1_[GMX_SIMD_REAL_WIDTH / 2 + j];
858 ref0 = load<SimdReal>(val0_);
859 ref1 = load<SimdReal>(val1_);
863 gatherLoadTransposeHsimd<2>(mem0_, mem1_, offset_, &v0, &v1);
867 gatherLoadTransposeHsimd<4>(mem0_, mem1_, offset_, &v0, &v1);
869 else if (align == c_simdBestPairAlignment)
871 gatherLoadTransposeHsimd<c_simdBestPairAlignment>(mem0_, mem1_, offset_, &v0, &v1);
878 GMX_EXPECT_SIMD_REAL_EQ(ref0, v0);
879 GMX_EXPECT_SIMD_REAL_EQ(ref1, v1);
884 TEST_F(SimdFloatingpointUtilTest, reduceIncr4SumHsimd)
888 real sum0, sum1, sum2, sum3, tstsum;
889 FloatingPointTolerance tolerance(defaultRealTolerance());
891 // Use the half-SIMD storage in memory val0_ and val1_.
892 v0 = load<SimdReal>(val0_);
893 v1 = load<SimdReal>(val1_);
895 sum0 = sum1 = sum2 = sum3 = 0;
896 for (i = 0; i < GMX_SIMD_REAL_WIDTH / 2; i++)
899 sum1 += val0_[GMX_SIMD_REAL_WIDTH / 2 + i];
901 sum3 += val1_[GMX_SIMD_REAL_WIDTH / 2 + i];
904 // Just put some numbers in memory so we check the addition is correct
910 tstsum = reduceIncr4ReturnSumHsimd(mem0_, v0, v1);
912 EXPECT_REAL_EQ_TOL(c0 + sum0, mem0_[0], tolerance);
913 EXPECT_REAL_EQ_TOL(c1 + sum1, mem0_[1], tolerance);
914 EXPECT_REAL_EQ_TOL(c2 + sum2, mem0_[2], tolerance);
915 EXPECT_REAL_EQ_TOL(c3 + sum3, mem0_[3], tolerance);
917 EXPECT_REAL_EQ_TOL(sum0 + sum1 + sum2 + sum3, tstsum, tolerance);
920 #endif // GMX_SIMD_HAVE_HSIMD_UTIL_REAL
922 //Test Currently doesn't work for GMX_SIMD_REAL_WIDTH<4. Should be fixed by having GMX_EXPECT_SIMD_REAL_EQ which works for both Simd and Simd4
923 #if GMX_SIMD_HAVE_4NSIMD_UTIL_REAL && GMX_SIMD_REAL_WIDTH >= 4
925 TEST_F(SimdFloatingpointUtilTest, loadUNDuplicate4)
929 real data[GMX_SIMD_REAL_WIDTH/4];
930 std::iota(data, data+GMX_SIMD_REAL_WIDTH/4, 1);
932 #if defined _ICC && __ICC == 1800 || defined __ICL && __ICL == 1800
933 #pragma novector /* Work-around for incorrect vectorization for AVX_512(_KNL) */
935 for (i = 0; i < GMX_SIMD_REAL_WIDTH / 4; i++)
937 val0_[i*4] = val0_[i*4+1] = val0_[i*4+2] = val0_[i*4+3] = data[i];
940 v0 = load<Simd4NReal>(val0_);
941 v1 = loadUNDuplicate4(data);
943 GMX_EXPECT_SIMD_REAL_EQ(v0, v1);
946 TEST_F(SimdFloatingpointUtilTest, load4DuplicateN)
950 real data[4] = { 1, 2, 3, 4};
952 for (i = 0; i < GMX_SIMD_REAL_WIDTH / 4; i++)
954 val0_[i*4] = data[0];
955 val0_[i*4+1] = data[1];
956 val0_[i*4+2] = data[2];
957 val0_[i*4+3] = data[3];
960 v0 = load<Simd4NReal>(val0_);
961 v1 = load4DuplicateN(val0_);
963 GMX_EXPECT_SIMD_REAL_EQ(v0, v1);
966 TEST_F(SimdFloatingpointUtilTest, loadU4NOffset)
968 constexpr int offset = 6; //non power of 2
969 constexpr int dataLen = 4+offset*(GMX_SIMD_REAL_WIDTH/4-1);
971 std::iota(data, data+dataLen, 1);
973 for (int i = 0; i < GMX_SIMD_REAL_WIDTH / 4; i++)
975 val0_[i*4] = data[0+offset*i];
976 val0_[i*4+1] = data[1+offset*i];
977 val0_[i*4+2] = data[2+offset*i];
978 val0_[i*4+3] = data[3+offset*i];
981 const Simd4NReal v0 = load<Simd4NReal>(val0_);
982 const Simd4NReal v1 = loadU4NOffset(data, offset);
984 GMX_EXPECT_SIMD_REAL_EQ(v0, v1);
987 #endif // GMX_SIMD_HAVE_4NSIMD_UTIL_REAL
989 #endif // GMX_SIMD_HAVE_REAL