2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2015,2017,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
39 #include "gromacs/simd/simd.h"
40 #include "gromacs/utility/alignedallocator.h"
41 #include "gromacs/utility/basedefinitions.h"
43 #include "testutils/testasserts.h"
55 /*! \addtogroup module_simd */
58 #if GMX_SIMD_HAVE_REAL
60 /*! \brief Test fixture for higher-level floating-point utility functions.
62 * Inherit from main SimdTest, add code to generate aligned memory and data.
64 class SimdFloatingpointUtilTest : public SimdTest
67 SimdFloatingpointUtilTest()
69 // Resize vectors to get the amount of memory we need
70 integerMemory_.resize(GMX_SIMD_REAL_WIDTH);
72 // The total memory we allocate corresponds to two work arrays
73 // and 4 values each of GMX_SIMD_REAL_WIDTH.
74 realMemory_.resize(2*s_workMemSize_+4*GMX_SIMD_REAL_WIDTH);
76 offset_ = integerMemory_.data();
77 val0_ = realMemory_.data();
78 val1_ = val0_ + GMX_SIMD_REAL_WIDTH;
79 val2_ = val1_ + GMX_SIMD_REAL_WIDTH;
80 val3_ = val2_ + GMX_SIMD_REAL_WIDTH;
81 mem0_ = val3_ + GMX_SIMD_REAL_WIDTH;
82 mem1_ = mem0_ + s_workMemSize_;
84 // Set default values for offset and variables val0_ through val3_
85 // We cannot fill mem_ here since those values depend on the test.
86 for (int i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
88 // Use every third point to avoid a continguous access pattern
90 // Multiply numbers by 1+100*GMX_REAL_EPS ensures some low bits are
91 // set too, so the tests make sure we read all bits correctly.
92 val0_[i] = (i ) * (1.0 + 100*GMX_REAL_EPS);
93 val1_[i] = (i + 0.1) * (1.0 + 100*GMX_REAL_EPS);
94 val2_[i] = (i + 0.2) * (1.0 + 100*GMX_REAL_EPS);
95 val3_[i] = (i + 0.3) * (1.0 + 100*GMX_REAL_EPS);
100 //! \brief Size of memory work buffers
102 // To have a somewhat odd access pattern, we use every
103 // third entry, so the largest value of offset_[i] is 3*GMX_SIMD_REAL_WIDTH.
104 // Then we also allow alignments up to 16, which means the largest index in mem0_[]
105 // that we might access is 16*3*GMX_SIMD_REAL_WIDTH+3.
106 static const std::size_t s_workMemSize_ = 16*3*GMX_SIMD_REAL_WIDTH+4;
108 std::vector<int, AlignedAllocator<int> > integerMemory_; //!< Aligned integer memory
109 std::vector<real, AlignedAllocator<real> > realMemory_; //!< Aligned real memory
111 int * offset_; //!< Pointer to offset indices, aligned memory
112 real * val0_; //!< Pointer to GMX_SIMD_REAL_WIDTH values, aligned
113 real * val1_; //!< Pointer to GMX_SIMD_REAL_WIDTH values, aligned
114 real * val2_; //!< Pointer to GMX_SIMD_REAL_WIDTH values, aligned
115 real * val3_; //!< Pointer to GMX_SIMD_REAL_WIDTH values, aligned
117 real * mem0_; //!< Pointer to aligned memory, s_workMemSize real values
118 real * mem1_; //!< Pointer to aligned memory, s_workMemSize real values
123 TEST_F(SimdFloatingpointUtilTest, gatherLoadTranspose4)
125 SimdReal v0, v1, v2, v3;
126 SimdReal ref0, ref1, ref2, ref3;
127 const int nalign = 3;
128 int alignmentList[nalign] = { 4, 8, 12 };
131 for (i = 0; i < nalign; i++)
133 align = alignmentList[i];
134 for (j = 0; j < GMX_SIMD_REAL_WIDTH; j++)
136 mem0_[align * offset_[j] ] = val0_[j];
137 mem0_[align * offset_[j] + 1] = val1_[j];
138 mem0_[align * offset_[j] + 2] = val2_[j];
139 mem0_[align * offset_[j] + 3] = val3_[j];
142 ref0 = load<SimdReal>(val0_);
143 ref1 = load<SimdReal>(val1_);
144 ref2 = load<SimdReal>(val2_);
145 ref3 = load<SimdReal>(val3_);
149 gatherLoadTranspose<4>(mem0_, offset_, &v0, &v1, &v2, &v3);
153 gatherLoadTranspose<8>(mem0_, offset_, &v0, &v1, &v2, &v3);
155 else if (align == 12)
157 gatherLoadTranspose<12>(mem0_, offset_, &v0, &v1, &v2, &v3);
164 GMX_EXPECT_SIMD_REAL_EQ(ref0, v0);
165 GMX_EXPECT_SIMD_REAL_EQ(ref1, v1);
166 GMX_EXPECT_SIMD_REAL_EQ(ref2, v2);
167 GMX_EXPECT_SIMD_REAL_EQ(ref3, v3);
171 TEST_F(SimdFloatingpointUtilTest, gatherLoadTranspose2)
175 const int nalign = 3;
176 int alignmentList[nalign] = { 2, 4, c_simdBestPairAlignment };
179 EXPECT_TRUE(c_simdBestPairAlignment <= GMX_SIMD_REAL_WIDTH);
181 for (i = 0; i < nalign; i++)
183 align = alignmentList[i];
184 for (j = 0; j < GMX_SIMD_REAL_WIDTH; j++)
186 mem0_[align * offset_[j] ] = val0_[j];
187 mem0_[align * offset_[j] + 1] = val1_[j];
190 ref0 = load<SimdReal>(val0_);
191 ref1 = load<SimdReal>(val1_);
195 gatherLoadTranspose<2>(mem0_, offset_, &v0, &v1);
199 gatherLoadTranspose<4>(mem0_, offset_, &v0, &v1);
201 else if (align == c_simdBestPairAlignment)
203 gatherLoadTranspose<c_simdBestPairAlignment>(mem0_, offset_, &v0, &v1);
210 GMX_EXPECT_SIMD_REAL_EQ(ref0, v0);
211 GMX_EXPECT_SIMD_REAL_EQ(ref1, v1);
215 TEST_F(SimdFloatingpointUtilTest, gatherLoadUTranspose3)
218 SimdReal ref0, ref1, ref2;
219 const int nalign = 2;
220 int alignmentList[nalign] = { 3, 4 };
223 for (i = 0; i < nalign; i++)
225 align = alignmentList[i];
226 for (j = 0; j < GMX_SIMD_REAL_WIDTH; j++)
228 mem0_[align * offset_[j] ] = val0_[j];
229 mem0_[align * offset_[j] + 1] = val1_[j];
230 mem0_[align * offset_[j] + 2] = val2_[j];
233 ref0 = load<SimdReal>(val0_);
234 ref1 = load<SimdReal>(val1_);
235 ref2 = load<SimdReal>(val2_);
239 gatherLoadUTranspose<3>(mem0_, offset_, &v0, &v1, &v2);
243 gatherLoadUTranspose<4>(mem0_, offset_, &v0, &v1, &v2);
250 GMX_EXPECT_SIMD_REAL_EQ(ref0, v0);
251 GMX_EXPECT_SIMD_REAL_EQ(ref1, v1);
252 GMX_EXPECT_SIMD_REAL_EQ(ref2, v2);
256 TEST_F(SimdFloatingpointUtilTest, transposeScatterStoreU3)
259 real refmem[s_workMemSize_];
260 const int nalign = 2;
261 int alignmentList[nalign] = { 3, 4 };
263 FloatingPointTolerance tolerance(defaultRealTolerance());
265 for (i = 0; i < nalign; i++)
267 align = alignmentList[i];
269 // Set test and reference memory to background value
270 for (std::size_t j = 0; j < s_workMemSize_; j++)
272 // Multiply by 1+100*eps to make sure low bits are also used
273 mem0_[j] = refmem[j] = (1000.0 + j) * (1.0 + 100*GMX_REAL_EPS);
276 for (std::size_t j = 0; j < GMX_SIMD_REAL_WIDTH; j++)
278 // set values in _reference_ memory (we will then test with mem0_, and compare)
279 refmem[align * offset_[j] ] = val0_[j];
280 refmem[align * offset_[j] + 1] = val1_[j];
281 refmem[align * offset_[j] + 2] = val2_[j];
284 v0 = load<SimdReal>(val0_);
285 v1 = load<SimdReal>(val1_);
286 v2 = load<SimdReal>(val2_);
290 transposeScatterStoreU<3>(mem0_, offset_, v0, v1, v2);
294 transposeScatterStoreU<4>(mem0_, offset_, v0, v1, v2);
301 for (std::size_t j = 0; j < s_workMemSize_; j++)
303 EXPECT_REAL_EQ_TOL(refmem[j], mem0_[j], tolerance);
308 TEST_F(SimdFloatingpointUtilTest, transposeScatterIncrU3)
311 real refmem[s_workMemSize_];
312 const int nalign = 2;
313 int alignmentList[nalign] = { 3, 4 };
315 FloatingPointTolerance tolerance(defaultRealTolerance());
317 for (i = 0; i < nalign; i++)
319 align = alignmentList[i];
321 // Set test and reference memory to background value
322 for (std::size_t j = 0; j < s_workMemSize_; j++)
324 // Multiply by 1+100*eps to make sure low bits are also used
325 mem0_[j] = refmem[j] = (1000.0 + j) * (1.0 + 100*GMX_REAL_EPS);
328 for (std::size_t j = 0; j < GMX_SIMD_REAL_WIDTH; j++)
330 // Add values to _reference_ memory (we will then test with mem0_, and compare)
331 refmem[align * offset_[j] ] += val0_[j];
332 refmem[align * offset_[j] + 1] += val1_[j];
333 refmem[align * offset_[j] + 2] += val2_[j];
336 v0 = load<SimdReal>(val0_);
337 v1 = load<SimdReal>(val1_);
338 v2 = load<SimdReal>(val2_);
342 transposeScatterIncrU<3>(mem0_, offset_, v0, v1, v2);
346 transposeScatterIncrU<4>(mem0_, offset_, v0, v1, v2);
353 for (std::size_t j = 0; j < s_workMemSize_; j++)
355 EXPECT_REAL_EQ_TOL(refmem[j], mem0_[j], tolerance);
360 TEST_F(SimdFloatingpointUtilTest, transposeScatterIncrU3Overlapping)
363 real refmem[s_workMemSize_];
364 FloatingPointTolerance tolerance(defaultRealTolerance());
366 // Alter offset_ to make all entries point to the same (first) value, so all entries will overlap
367 for (std::size_t j = 0; j < GMX_SIMD_REAL_WIDTH; j++)
372 // Set test and reference memory to background value
373 for (std::size_t j = 0; j < s_workMemSize_; j++)
375 // Multiply by 1+100*eps to make sure low bits are also used
376 mem0_[j] = refmem[j] = (1000.0 + j) * (1.0 + 100*GMX_REAL_EPS);
379 for (std::size_t j = 0; j < GMX_SIMD_REAL_WIDTH; j++)
381 // Add values to _reference_ memory (we will then test with mem0_, and compare)
382 refmem[3 * offset_[j] ] += val0_[j];
383 refmem[3 * offset_[j] + 1] += val1_[j];
384 refmem[3 * offset_[j] + 2] += val2_[j];
387 v0 = load<SimdReal>(val0_);
388 v1 = load<SimdReal>(val1_);
389 v2 = load<SimdReal>(val2_);
391 transposeScatterIncrU<3>(mem0_, offset_, v0, v1, v2);
393 for (std::size_t j = 0; j < s_workMemSize_; j++)
395 EXPECT_REAL_EQ_TOL(refmem[j], mem0_[j], tolerance);
399 TEST_F(SimdFloatingpointUtilTest, transposeScatterDecrU3)
402 real refmem[s_workMemSize_];
403 const int nalign = 2;
404 int alignmentList[nalign] = { 3, 4 };
406 FloatingPointTolerance tolerance(defaultRealTolerance());
408 for (i = 0; i < nalign; i++)
410 align = alignmentList[i];
412 // Set test and reference memory to background value
413 for (std::size_t j = 0; j < s_workMemSize_; j++)
415 // Multiply by 1+100*eps to make sure low bits are also used
416 mem0_[j] = refmem[j] = (1000.0 + j) * (1.0 + 100*GMX_REAL_EPS);
419 for (std::size_t j = 0; j < GMX_SIMD_REAL_WIDTH; j++)
421 // Subtract values from _reference_ memory (we will then test with mem0_, and compare)
422 refmem[align * offset_[j] ] -= val0_[j];
423 refmem[align * offset_[j] + 1] -= val1_[j];
424 refmem[align * offset_[j] + 2] -= val2_[j];
427 v0 = load<SimdReal>(val0_);
428 v1 = load<SimdReal>(val1_);
429 v2 = load<SimdReal>(val2_);
433 transposeScatterDecrU<3>(mem0_, offset_, v0, v1, v2);
437 transposeScatterDecrU<4>(mem0_, offset_, v0, v1, v2);
444 for (std::size_t j = 0; j < s_workMemSize_; j++)
446 EXPECT_REAL_EQ_TOL(refmem[j], mem0_[j], tolerance);
451 TEST_F(SimdFloatingpointUtilTest, transposeScatterDecrU3Overlapping)
454 real refmem[s_workMemSize_];
455 FloatingPointTolerance tolerance(defaultRealTolerance());
457 // Alter offset_ to make all entries point to the same (first) value, so all entries will overlap
458 for (std::size_t j = 0; j < GMX_SIMD_REAL_WIDTH; j++)
463 // Set test and reference memory to background value
464 for (std::size_t j = 0; j < s_workMemSize_; j++)
466 // Multiply by 1+100*eps to make sure low bits are also used
467 mem0_[j] = refmem[j] = (1000.0 + j) * (1.0 + 100*GMX_REAL_EPS);
470 #ifdef __INTEL_COMPILER //Bug in (at least) 19u1 and 18u5 (03424712)
473 for (std::size_t j = 0; j < GMX_SIMD_REAL_WIDTH; j++)
475 // Subtract values from _reference_ memory (we will then test with mem0_, and compare)
476 refmem[3 * offset_[j] ] -= val0_[j];
477 refmem[3 * offset_[j] + 1] -= val1_[j];
478 refmem[3 * offset_[j] + 2] -= val2_[j];
481 v0 = load<SimdReal>(val0_);
482 v1 = load<SimdReal>(val1_);
483 v2 = load<SimdReal>(val2_);
485 transposeScatterDecrU<3>(mem0_, offset_, v0, v1, v2);
487 for (std::size_t j = 0; j < s_workMemSize_; j++)
489 EXPECT_REAL_EQ_TOL(refmem[j], mem0_[j], tolerance);
493 TEST_F(SimdFloatingpointUtilTest, expandScalarsToTriplets)
495 SimdReal vs, v0, v1, v2;
498 for (i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
503 vs = load<SimdReal>(mem0_);
505 expandScalarsToTriplets(vs, &v0, &v1, &v2);
511 for (i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
513 EXPECT_EQ(i / 3, val0_[i]);
514 EXPECT_EQ((i + GMX_SIMD_REAL_WIDTH) / 3, val1_[i]);
515 EXPECT_EQ((i + 2 * GMX_SIMD_REAL_WIDTH) / 3, val2_[i]);
520 TEST_F(SimdFloatingpointUtilTest, gatherLoadBySimdIntTranspose4)
522 SimdReal v0, v1, v2, v3;
523 SimdReal ref0, ref1, ref2, ref3;
524 SimdInt32 simdoffset;
525 const int nalign = 3;
526 int alignmentList[nalign] = { 4, 8, 12 };
529 for (i = 0; i < nalign; i++)
531 align = alignmentList[i];
532 for (j = 0; j < GMX_SIMD_REAL_WIDTH; j++)
534 mem0_[align * offset_[j] ] = val0_[j];
535 mem0_[align * offset_[j] + 1] = val1_[j];
536 mem0_[align * offset_[j] + 2] = val2_[j];
537 mem0_[align * offset_[j] + 3] = val3_[j];
540 simdoffset = load<SimdInt32>(offset_);
541 ref0 = load<SimdReal>(val0_);
542 ref1 = load<SimdReal>(val1_);
543 ref2 = load<SimdReal>(val2_);
544 ref3 = load<SimdReal>(val3_);
548 gatherLoadBySimdIntTranspose<4>(mem0_, simdoffset, &v0, &v1, &v2, &v3);
552 gatherLoadBySimdIntTranspose<8>(mem0_, simdoffset, &v0, &v1, &v2, &v3);
554 else if (align == 12)
556 gatherLoadBySimdIntTranspose<12>(mem0_, simdoffset, &v0, &v1, &v2, &v3);
563 GMX_EXPECT_SIMD_REAL_EQ(ref0, v0);
564 GMX_EXPECT_SIMD_REAL_EQ(ref1, v1);
565 GMX_EXPECT_SIMD_REAL_EQ(ref2, v2);
566 GMX_EXPECT_SIMD_REAL_EQ(ref3, v3);
571 TEST_F(SimdFloatingpointUtilTest, gatherLoadBySimdIntTranspose2)
575 SimdInt32 simdoffset;
576 const int nalign = 3;
577 int alignmentList[nalign] = { 4, 8, 12 };
580 for (i = 0; i < nalign; i++)
582 align = alignmentList[i];
583 for (j = 0; j < GMX_SIMD_REAL_WIDTH; j++)
585 mem0_[align * offset_[j] ] = val0_[j];
586 mem0_[align * offset_[j] + 1] = val1_[j];
589 simdoffset = load<SimdInt32>(offset_);
590 ref0 = load<SimdReal>(val0_);
591 ref1 = load<SimdReal>(val1_);
595 gatherLoadBySimdIntTranspose<4>(mem0_, simdoffset, &v0, &v1);
599 gatherLoadBySimdIntTranspose<8>(mem0_, simdoffset, &v0, &v1);
601 else if (align == 12)
603 gatherLoadBySimdIntTranspose<12>(mem0_, simdoffset, &v0, &v1);
610 GMX_EXPECT_SIMD_REAL_EQ(ref0, v0);
611 GMX_EXPECT_SIMD_REAL_EQ(ref1, v1);
615 #if GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_REAL
616 TEST_F(SimdFloatingpointUtilTest, gatherLoadUBySimdIntTranspose2)
620 SimdInt32 simdoffset;
621 const int nalign = 3;
622 int alignmentList[nalign] = { 1, 3, 5 };
625 for (i = 0; i < nalign; i++)
627 align = alignmentList[i];
628 for (j = 0; j < GMX_SIMD_REAL_WIDTH; j++)
630 mem0_[align * offset_[j] ] = val0_[j];
631 mem0_[align * offset_[j] + 1] = val1_[j];
634 simdoffset = load<SimdInt32>(offset_);
635 ref0 = load<SimdReal>(val0_);
636 ref1 = load<SimdReal>(val1_);
640 gatherLoadUBySimdIntTranspose<1>(mem0_, simdoffset, &v0, &v1);
644 gatherLoadUBySimdIntTranspose<3>(mem0_, simdoffset, &v0, &v1);
648 gatherLoadUBySimdIntTranspose<5>(mem0_, simdoffset, &v0, &v1);
655 GMX_EXPECT_SIMD_REAL_EQ(ref0, v0);
656 GMX_EXPECT_SIMD_REAL_EQ(ref1, v1);
659 #endif // GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_REAL
661 TEST_F(SimdFloatingpointUtilTest, reduceIncr4Sum)
664 SimdReal v0, v1, v2, v3;
665 real sum0, sum1, sum2, sum3, tstsum;
666 FloatingPointTolerance tolerance(defaultRealTolerance());
668 v0 = load<SimdReal>(val0_);
669 v1 = load<SimdReal>(val1_);
670 v2 = load<SimdReal>(val2_);
671 v3 = load<SimdReal>(val3_);
673 sum0 = sum1 = sum2 = sum3 = 0;
674 for (i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
682 // Just put some numbers in memory so we check the addition is correct
688 tstsum = reduceIncr4ReturnSum(mem0_, v0, v1, v2, v3);
690 EXPECT_REAL_EQ_TOL(c0 + sum0, mem0_[0], tolerance);
691 EXPECT_REAL_EQ_TOL(c1 + sum1, mem0_[1], tolerance);
692 EXPECT_REAL_EQ_TOL(c2 + sum2, mem0_[2], tolerance);
693 EXPECT_REAL_EQ_TOL(c3 + sum3, mem0_[3], tolerance);
695 EXPECT_REAL_EQ_TOL(sum0 + sum1 + sum2 + sum3, tstsum, tolerance);
698 #if GMX_SIMD_HAVE_HSIMD_UTIL_REAL
700 TEST_F(SimdFloatingpointUtilTest, loadDualHsimd)
704 // Point p to the upper half of val0_
705 real * p = val0_ + GMX_SIMD_REAL_WIDTH / 2;
707 v0 = load<SimdReal>(val0_);
708 v1 = loadDualHsimd(val0_, p);
710 GMX_EXPECT_SIMD_REAL_EQ(v0, v1);
713 TEST_F(SimdFloatingpointUtilTest, loadDuplicateHsimd)
717 // Point p to the upper half of val0_
718 real * p = val0_ + GMX_SIMD_REAL_WIDTH / 2;
719 // Copy data so upper half is identical to lower
720 for (i = 0; i < GMX_SIMD_REAL_WIDTH / 2; i++)
725 v0 = load<SimdReal>(val0_);
726 v1 = loadDuplicateHsimd(val0_);
728 GMX_EXPECT_SIMD_REAL_EQ(v0, v1);
732 TEST_F(SimdFloatingpointUtilTest, loadU1DualHsimd)
736 real data[2] = { 1, 2 };
738 // Point p to the upper half of val0_
739 real * p = val0_ + GMX_SIMD_REAL_WIDTH / 2;
740 // Set all low elements to data[0], an high to data[1]
741 for (i = 0; i < GMX_SIMD_REAL_WIDTH / 2; i++)
747 v0 = load<SimdReal>(val0_);
748 v1 = loadU1DualHsimd(data);
750 GMX_EXPECT_SIMD_REAL_EQ(v0, v1);
754 TEST_F(SimdFloatingpointUtilTest, storeDualHsimd)
759 // Point p to the upper half of val0_
760 real * p = val0_ + GMX_SIMD_REAL_WIDTH / 2;
762 v0 = load<SimdReal>(val2_);
763 storeDualHsimd(val0_, p, v0);
765 for (i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
767 EXPECT_EQ(val2_[i], val0_[i]);
771 TEST_F(SimdFloatingpointUtilTest, incrDualHsimd)
773 real reference[GMX_SIMD_REAL_WIDTH];
776 // Create reference values
777 for (std::size_t i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
779 reference[i] = val0_[i] + val2_[i];
782 // Point p to the upper half of val0_
783 real * p = val0_ + GMX_SIMD_REAL_WIDTH / 2;
785 v0 = load<SimdReal>(val2_);
786 incrDualHsimd(val0_, p, v0);
788 for (std::size_t i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
790 EXPECT_EQ(reference[i], val0_[i]);
794 TEST_F(SimdFloatingpointUtilTest, incrDualHsimdOverlapping)
796 real reference[GMX_SIMD_REAL_WIDTH/2];
799 // Create reference values
800 for (std::size_t i = 0; i < GMX_SIMD_REAL_WIDTH/2; i++)
802 reference[i] = val0_[i] + val2_[i] + val2_[GMX_SIMD_REAL_WIDTH/2+i];
805 v0 = load<SimdReal>(val2_);
806 incrDualHsimd(val0_, val0_, v0);
808 for (std::size_t i = 0; i < GMX_SIMD_REAL_WIDTH/2; i++)
810 EXPECT_EQ(reference[i], val0_[i]);
814 TEST_F(SimdFloatingpointUtilTest, decrHsimd)
817 real ref[GMX_SIMD_REAL_WIDTH / 2];
819 FloatingPointTolerance tolerance(defaultRealTolerance());
821 // Point p to the upper half of val1_
822 real * p = val1_ + GMX_SIMD_REAL_WIDTH / 2;
823 for (i = 0; i < GMX_SIMD_REAL_WIDTH / 2; i++)
825 ref[i] = val0_[i] - ( val1_[i] + p[i] );
828 v0 = load<SimdReal>(val1_);
829 decrHsimd(val0_, v0);
831 for (i = 0; i < GMX_SIMD_REAL_WIDTH / 2; i++)
833 EXPECT_REAL_EQ_TOL(ref[i], val0_[i], tolerance);
838 TEST_F(SimdFloatingpointUtilTest, gatherLoadTranspose2Hsimd)
843 const int nalign = 3;
844 int alignmentList[nalign] = { 2, 4, c_simdBestPairAlignment };
847 for (i = 0; i < nalign; i++)
849 align = alignmentList[i];
850 for (j = 0; j < GMX_SIMD_REAL_WIDTH / 2; j++)
852 // Use mem0_ as base for lower half
853 mem0_[align * offset_[j] ] = val0_[j];
854 mem0_[align * offset_[j] + 1] = val1_[j];
855 // Use mem1_ as base for upper half
856 mem1_[align * offset_[j] ] = val0_[GMX_SIMD_REAL_WIDTH / 2 + j];
857 mem1_[align * offset_[j] + 1] = val1_[GMX_SIMD_REAL_WIDTH / 2 + j];
861 ref0 = load<SimdReal>(val0_);
862 ref1 = load<SimdReal>(val1_);
866 gatherLoadTransposeHsimd<2>(mem0_, mem1_, offset_, &v0, &v1);
870 gatherLoadTransposeHsimd<4>(mem0_, mem1_, offset_, &v0, &v1);
872 else if (align == c_simdBestPairAlignment)
874 gatherLoadTransposeHsimd<c_simdBestPairAlignment>(mem0_, mem1_, offset_, &v0, &v1);
881 GMX_EXPECT_SIMD_REAL_EQ(ref0, v0);
882 GMX_EXPECT_SIMD_REAL_EQ(ref1, v1);
887 TEST_F(SimdFloatingpointUtilTest, reduceIncr4SumHsimd)
891 real sum0, sum1, sum2, sum3, tstsum;
892 FloatingPointTolerance tolerance(defaultRealTolerance());
894 // Use the half-SIMD storage in memory val0_ and val1_.
895 v0 = load<SimdReal>(val0_);
896 v1 = load<SimdReal>(val1_);
898 sum0 = sum1 = sum2 = sum3 = 0;
899 for (i = 0; i < GMX_SIMD_REAL_WIDTH / 2; i++)
902 sum1 += val0_[GMX_SIMD_REAL_WIDTH / 2 + i];
904 sum3 += val1_[GMX_SIMD_REAL_WIDTH / 2 + i];
907 // Just put some numbers in memory so we check the addition is correct
913 tstsum = reduceIncr4ReturnSumHsimd(mem0_, v0, v1);
915 EXPECT_REAL_EQ_TOL(c0 + sum0, mem0_[0], tolerance);
916 EXPECT_REAL_EQ_TOL(c1 + sum1, mem0_[1], tolerance);
917 EXPECT_REAL_EQ_TOL(c2 + sum2, mem0_[2], tolerance);
918 EXPECT_REAL_EQ_TOL(c3 + sum3, mem0_[3], tolerance);
920 EXPECT_REAL_EQ_TOL(sum0 + sum1 + sum2 + sum3, tstsum, tolerance);
923 #endif // GMX_SIMD_HAVE_HSIMD_UTIL_REAL
925 //Test Currently doesn't work for GMX_SIMD_REAL_WIDTH<4. Should be fixed by having GMX_EXPECT_SIMD_REAL_EQ which works for both Simd and Simd4
926 #if GMX_SIMD_HAVE_4NSIMD_UTIL_REAL && GMX_SIMD_REAL_WIDTH >= 4
928 TEST_F(SimdFloatingpointUtilTest, loadUNDuplicate4)
932 real data[GMX_SIMD_REAL_WIDTH/4];
933 std::iota(data, data+GMX_SIMD_REAL_WIDTH/4, 1);
935 #if defined __ICC && __ICC == 1800 || defined __ICL && __ICL == 1800
936 #pragma novector /* Work-around for incorrect vectorization for AVX_512(_KNL) */
938 for (i = 0; i < GMX_SIMD_REAL_WIDTH / 4; i++)
940 val0_[i*4] = val0_[i*4+1] = val0_[i*4+2] = val0_[i*4+3] = data[i];
943 v0 = load<Simd4NReal>(val0_);
944 v1 = loadUNDuplicate4(data);
946 GMX_EXPECT_SIMD_REAL_EQ(v0, v1);
949 TEST_F(SimdFloatingpointUtilTest, load4DuplicateN)
953 real data[4] = { 1, 2, 3, 4};
955 for (i = 0; i < GMX_SIMD_REAL_WIDTH / 4; i++)
957 val0_[i*4] = data[0];
958 val0_[i*4+1] = data[1];
959 val0_[i*4+2] = data[2];
960 val0_[i*4+3] = data[3];
963 v0 = load<Simd4NReal>(val0_);
964 v1 = load4DuplicateN(val0_);
966 GMX_EXPECT_SIMD_REAL_EQ(v0, v1);
969 TEST_F(SimdFloatingpointUtilTest, loadU4NOffset)
971 constexpr int offset = 6; //non power of 2
972 constexpr int dataLen = 4+offset*(GMX_SIMD_REAL_WIDTH/4-1);
974 std::iota(data, data+dataLen, 1);
976 for (int i = 0; i < GMX_SIMD_REAL_WIDTH / 4; i++)
978 val0_[i*4] = data[0+offset*i];
979 val0_[i*4+1] = data[1+offset*i];
980 val0_[i*4+2] = data[2+offset*i];
981 val0_[i*4+3] = data[3+offset*i];
984 const Simd4NReal v0 = load<Simd4NReal>(val0_);
985 const Simd4NReal v1 = loadU4NOffset(data, offset);
987 GMX_EXPECT_SIMD_REAL_EQ(v0, v1);
990 #endif // GMX_SIMD_HAVE_4NSIMD_UTIL_REAL
992 #endif // GMX_SIMD_HAVE_REAL