2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2015,2017,2018,2019,2020,2021, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
39 #include "gromacs/simd/simd.h"
40 #include "gromacs/utility/alignedallocator.h"
41 #include "gromacs/utility/basedefinitions.h"
43 #include "testutils/testasserts.h"
55 /*! \addtogroup module_simd */
58 #if GMX_SIMD_HAVE_REAL
60 /*! \brief Test fixture for higher-level floating-point utility functions.
62 * Inherit from main SimdTest, add code to generate aligned memory and data.
64 class SimdFloatingpointUtilTest : public SimdTest
67 SimdFloatingpointUtilTest()
69 // Resize vectors to get the amount of memory we need
70 integerMemory_.resize(GMX_SIMD_REAL_WIDTH);
72 // The total memory we allocate corresponds to two work arrays
73 // and 4 values each of GMX_SIMD_REAL_WIDTH.
74 realMemory_.resize(2 * s_workMemSize_ + 4 * GMX_SIMD_REAL_WIDTH);
76 offset_ = integerMemory_.data();
77 val0_ = realMemory_.data();
78 val1_ = val0_ + GMX_SIMD_REAL_WIDTH;
79 val2_ = val1_ + GMX_SIMD_REAL_WIDTH;
80 val3_ = val2_ + GMX_SIMD_REAL_WIDTH;
81 mem0_ = val3_ + GMX_SIMD_REAL_WIDTH;
82 mem1_ = mem0_ + s_workMemSize_;
84 // Set default values for offset and variables val0_ through val3_
85 // We cannot fill mem_ here since those values depend on the test.
86 for (int i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
88 // Use every third point to avoid a continguous access pattern
90 // Multiply numbers by 1+100*GMX_REAL_EPS ensures some low bits are
91 // set too, so the tests make sure we read all bits correctly.
92 val0_[i] = (i) * (1.0 + 100 * GMX_REAL_EPS);
93 val1_[i] = (i + 0.1) * (1.0 + 100 * GMX_REAL_EPS);
94 val2_[i] = (i + 0.2) * (1.0 + 100 * GMX_REAL_EPS);
95 val3_[i] = (i + 0.3) * (1.0 + 100 * GMX_REAL_EPS);
100 //! \brief Size of memory work buffers
102 // To have a somewhat odd access pattern, we use every
103 // third entry, so the largest value of offset_[i] is 3*GMX_SIMD_REAL_WIDTH.
104 // Then we also allow alignments up to 16, which means the largest index in mem0_[]
105 // that we might access is 16*3*GMX_SIMD_REAL_WIDTH+3.
106 static const std::size_t s_workMemSize_ = 16 * 3 * GMX_SIMD_REAL_WIDTH + 4;
108 std::vector<int, AlignedAllocator<int>> integerMemory_; //!< Aligned integer memory
109 std::vector<real, AlignedAllocator<real>> realMemory_; //!< Aligned real memory
111 int* offset_; //!< Pointer to offset indices, aligned memory
112 real* val0_; //!< Pointer to GMX_SIMD_REAL_WIDTH values, aligned
113 real* val1_; //!< Pointer to GMX_SIMD_REAL_WIDTH values, aligned
114 real* val2_; //!< Pointer to GMX_SIMD_REAL_WIDTH values, aligned
115 real* val3_; //!< Pointer to GMX_SIMD_REAL_WIDTH values, aligned
117 real* mem0_; //!< Pointer to aligned memory, s_workMemSize real values
118 real* mem1_; //!< Pointer to aligned memory, s_workMemSize real values
122 TEST_F(SimdFloatingpointUtilTest, gatherLoadTranspose4)
124 SimdReal v0, v1, v2, v3;
125 SimdReal ref0, ref1, ref2, ref3;
126 const int nalign = 3;
127 int alignmentList[nalign] = { 4, 8, 12 };
130 for (i = 0; i < nalign; i++)
132 align = alignmentList[i];
133 for (j = 0; j < GMX_SIMD_REAL_WIDTH; j++)
135 mem0_[align * offset_[j]] = val0_[j];
136 mem0_[align * offset_[j] + 1] = val1_[j];
137 mem0_[align * offset_[j] + 2] = val2_[j];
138 mem0_[align * offset_[j] + 3] = val3_[j];
141 ref0 = load<SimdReal>(val0_);
142 ref1 = load<SimdReal>(val1_);
143 ref2 = load<SimdReal>(val2_);
144 ref3 = load<SimdReal>(val3_);
148 gatherLoadTranspose<4>(mem0_, offset_, &v0, &v1, &v2, &v3);
152 gatherLoadTranspose<8>(mem0_, offset_, &v0, &v1, &v2, &v3);
154 else if (align == 12)
156 gatherLoadTranspose<12>(mem0_, offset_, &v0, &v1, &v2, &v3);
163 GMX_EXPECT_SIMD_REAL_EQ(ref0, v0);
164 GMX_EXPECT_SIMD_REAL_EQ(ref1, v1);
165 GMX_EXPECT_SIMD_REAL_EQ(ref2, v2);
166 GMX_EXPECT_SIMD_REAL_EQ(ref3, v3);
170 TEST_F(SimdFloatingpointUtilTest, gatherLoadTranspose2)
174 const int nalign = 3;
175 int alignmentList[nalign] = { 2, 4, c_simdBestPairAlignment };
178 EXPECT_TRUE(c_simdBestPairAlignment <= GMX_SIMD_REAL_WIDTH);
180 for (i = 0; i < nalign; i++)
182 align = alignmentList[i];
183 for (j = 0; j < GMX_SIMD_REAL_WIDTH; j++)
185 mem0_[align * offset_[j]] = val0_[j];
186 mem0_[align * offset_[j] + 1] = val1_[j];
189 ref0 = load<SimdReal>(val0_);
190 ref1 = load<SimdReal>(val1_);
194 gatherLoadTranspose<2>(mem0_, offset_, &v0, &v1);
198 gatherLoadTranspose<4>(mem0_, offset_, &v0, &v1);
200 else if (align == c_simdBestPairAlignment)
202 gatherLoadTranspose<c_simdBestPairAlignment>(mem0_, offset_, &v0, &v1);
209 GMX_EXPECT_SIMD_REAL_EQ(ref0, v0);
210 GMX_EXPECT_SIMD_REAL_EQ(ref1, v1);
214 TEST_F(SimdFloatingpointUtilTest, gatherLoadUTranspose3)
217 SimdReal ref0, ref1, ref2;
218 const int nalign = 2;
219 int alignmentList[nalign] = { 3, 4 };
222 for (i = 0; i < nalign; i++)
224 align = alignmentList[i];
225 for (j = 0; j < GMX_SIMD_REAL_WIDTH; j++)
227 mem0_[align * offset_[j]] = val0_[j];
228 mem0_[align * offset_[j] + 1] = val1_[j];
229 mem0_[align * offset_[j] + 2] = val2_[j];
232 ref0 = load<SimdReal>(val0_);
233 ref1 = load<SimdReal>(val1_);
234 ref2 = load<SimdReal>(val2_);
238 gatherLoadUTranspose<3>(mem0_, offset_, &v0, &v1, &v2);
242 gatherLoadUTranspose<4>(mem0_, offset_, &v0, &v1, &v2);
249 GMX_EXPECT_SIMD_REAL_EQ(ref0, v0);
250 GMX_EXPECT_SIMD_REAL_EQ(ref1, v1);
251 GMX_EXPECT_SIMD_REAL_EQ(ref2, v2);
255 TEST_F(SimdFloatingpointUtilTest, transposeScatterStoreU3)
258 real refmem[s_workMemSize_];
259 const int nalign = 2;
260 int alignmentList[nalign] = { 3, 4 };
262 FloatingPointTolerance tolerance(defaultRealTolerance());
264 for (i = 0; i < nalign; i++)
266 align = alignmentList[i];
268 // Set test and reference memory to background value
269 for (std::size_t j = 0; j < s_workMemSize_; j++)
271 // Multiply by 1+100*eps to make sure low bits are also used
272 mem0_[j] = refmem[j] = (1000.0 + j) * (1.0 + 100 * GMX_REAL_EPS);
275 for (std::size_t j = 0; j < GMX_SIMD_REAL_WIDTH; j++)
277 // set values in _reference_ memory (we will then test with mem0_, and compare)
278 refmem[align * offset_[j]] = val0_[j];
279 refmem[align * offset_[j] + 1] = val1_[j];
280 refmem[align * offset_[j] + 2] = val2_[j];
283 v0 = load<SimdReal>(val0_);
284 v1 = load<SimdReal>(val1_);
285 v2 = load<SimdReal>(val2_);
289 transposeScatterStoreU<3>(mem0_, offset_, v0, v1, v2);
293 transposeScatterStoreU<4>(mem0_, offset_, v0, v1, v2);
300 for (std::size_t j = 0; j < s_workMemSize_; j++)
302 EXPECT_REAL_EQ_TOL(refmem[j], mem0_[j], tolerance);
307 TEST_F(SimdFloatingpointUtilTest, transposeScatterIncrU3)
310 real refmem[s_workMemSize_];
311 const int nalign = 2;
312 int alignmentList[nalign] = { 3, 4 };
314 FloatingPointTolerance tolerance(defaultRealTolerance());
316 for (i = 0; i < nalign; i++)
318 align = alignmentList[i];
320 // Set test and reference memory to background value
321 for (std::size_t j = 0; j < s_workMemSize_; j++)
323 // Multiply by 1+100*eps to make sure low bits are also used
324 mem0_[j] = refmem[j] = (1000.0 + j) * (1.0 + 100 * GMX_REAL_EPS);
327 for (std::size_t j = 0; j < GMX_SIMD_REAL_WIDTH; j++)
329 // Add values to _reference_ memory (we will then test with mem0_, and compare)
330 refmem[align * offset_[j]] += val0_[j];
331 refmem[align * offset_[j] + 1] += val1_[j];
332 refmem[align * offset_[j] + 2] += val2_[j];
335 v0 = load<SimdReal>(val0_);
336 v1 = load<SimdReal>(val1_);
337 v2 = load<SimdReal>(val2_);
341 transposeScatterIncrU<3>(mem0_, offset_, v0, v1, v2);
345 transposeScatterIncrU<4>(mem0_, offset_, v0, v1, v2);
352 for (std::size_t j = 0; j < s_workMemSize_; j++)
354 EXPECT_REAL_EQ_TOL(refmem[j], mem0_[j], tolerance);
359 TEST_F(SimdFloatingpointUtilTest, transposeScatterIncrU3Overlapping)
362 real refmem[s_workMemSize_];
363 FloatingPointTolerance tolerance(defaultRealTolerance());
365 // Alter offset_ to make all entries point to the same (first) value, so all entries will overlap
366 for (std::size_t j = 0; j < GMX_SIMD_REAL_WIDTH; j++)
371 // Set test and reference memory to background value
372 for (std::size_t j = 0; j < s_workMemSize_; j++)
374 // Multiply by 1+100*eps to make sure low bits are also used
375 mem0_[j] = refmem[j] = (1000.0 + j) * (1.0 + 100 * GMX_REAL_EPS);
378 for (std::size_t j = 0; j < GMX_SIMD_REAL_WIDTH; j++)
380 // Add values to _reference_ memory (we will then test with mem0_, and compare)
381 refmem[3 * offset_[j]] += val0_[j];
382 refmem[3 * offset_[j] + 1] += val1_[j];
383 refmem[3 * offset_[j] + 2] += val2_[j];
386 v0 = load<SimdReal>(val0_);
387 v1 = load<SimdReal>(val1_);
388 v2 = load<SimdReal>(val2_);
390 transposeScatterIncrU<3>(mem0_, offset_, v0, v1, v2);
392 for (std::size_t j = 0; j < s_workMemSize_; j++)
394 EXPECT_REAL_EQ_TOL(refmem[j], mem0_[j], tolerance);
398 TEST_F(SimdFloatingpointUtilTest, transposeScatterDecrU3)
401 real refmem[s_workMemSize_];
402 const int nalign = 2;
403 int alignmentList[nalign] = { 3, 4 };
405 FloatingPointTolerance tolerance(defaultRealTolerance());
407 for (i = 0; i < nalign; i++)
409 align = alignmentList[i];
411 // Set test and reference memory to background value
412 for (std::size_t j = 0; j < s_workMemSize_; j++)
414 // Multiply by 1+100*eps to make sure low bits are also used
415 mem0_[j] = refmem[j] = (1000.0 + j) * (1.0 + 100 * GMX_REAL_EPS);
418 for (std::size_t j = 0; j < GMX_SIMD_REAL_WIDTH; j++)
420 // Subtract values from _reference_ memory (we will then test with mem0_, and compare)
421 refmem[align * offset_[j]] -= val0_[j];
422 refmem[align * offset_[j] + 1] -= val1_[j];
423 refmem[align * offset_[j] + 2] -= val2_[j];
426 v0 = load<SimdReal>(val0_);
427 v1 = load<SimdReal>(val1_);
428 v2 = load<SimdReal>(val2_);
432 transposeScatterDecrU<3>(mem0_, offset_, v0, v1, v2);
436 transposeScatterDecrU<4>(mem0_, offset_, v0, v1, v2);
443 for (std::size_t j = 0; j < s_workMemSize_; j++)
445 EXPECT_REAL_EQ_TOL(refmem[j], mem0_[j], tolerance);
450 TEST_F(SimdFloatingpointUtilTest, transposeScatterDecrU3Overlapping)
453 real refmem[s_workMemSize_];
454 FloatingPointTolerance tolerance(defaultRealTolerance());
456 // Alter offset_ to make all entries point to the same (first) value, so all entries will overlap
457 for (std::size_t j = 0; j < GMX_SIMD_REAL_WIDTH; j++)
462 // Set test and reference memory to background value
463 for (std::size_t j = 0; j < s_workMemSize_; j++)
465 // Multiply by 1+100*eps to make sure low bits are also used
466 mem0_[j] = refmem[j] = (1000.0 + j) * (1.0 + 100 * GMX_REAL_EPS);
469 for (std::size_t j = 0; j < GMX_SIMD_REAL_WIDTH; j++)
471 // Subtract values from _reference_ memory (we will then test with mem0_, and compare)
472 refmem[3 * offset_[j]] -= val0_[j];
473 refmem[3 * offset_[j] + 1] -= val1_[j];
474 refmem[3 * offset_[j] + 2] -= val2_[j];
477 v0 = load<SimdReal>(val0_);
478 v1 = load<SimdReal>(val1_);
479 v2 = load<SimdReal>(val2_);
481 transposeScatterDecrU<3>(mem0_, offset_, v0, v1, v2);
483 for (std::size_t j = 0; j < s_workMemSize_; j++)
485 EXPECT_REAL_EQ_TOL(refmem[j], mem0_[j], tolerance);
489 TEST_F(SimdFloatingpointUtilTest, expandScalarsToTriplets)
491 SimdReal vs, v0, v1, v2;
494 for (i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
499 vs = load<SimdReal>(mem0_);
501 expandScalarsToTriplets(vs, &v0, &v1, &v2);
507 for (i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
509 EXPECT_EQ(i / 3, val0_[i]);
510 EXPECT_EQ((i + GMX_SIMD_REAL_WIDTH) / 3, val1_[i]);
511 EXPECT_EQ((i + 2 * GMX_SIMD_REAL_WIDTH) / 3, val2_[i]);
516 TEST_F(SimdFloatingpointUtilTest, gatherLoadBySimdIntTranspose4)
518 SimdReal v0, v1, v2, v3;
519 SimdReal ref0, ref1, ref2, ref3;
520 SimdInt32 simdoffset;
521 const int nalign = 3;
522 int alignmentList[nalign] = { 4, 8, 12 };
525 for (i = 0; i < nalign; i++)
527 align = alignmentList[i];
528 for (j = 0; j < GMX_SIMD_REAL_WIDTH; j++)
530 mem0_[align * offset_[j]] = val0_[j];
531 mem0_[align * offset_[j] + 1] = val1_[j];
532 mem0_[align * offset_[j] + 2] = val2_[j];
533 mem0_[align * offset_[j] + 3] = val3_[j];
536 simdoffset = load<SimdInt32>(offset_);
537 ref0 = load<SimdReal>(val0_);
538 ref1 = load<SimdReal>(val1_);
539 ref2 = load<SimdReal>(val2_);
540 ref3 = load<SimdReal>(val3_);
544 gatherLoadBySimdIntTranspose<4>(mem0_, simdoffset, &v0, &v1, &v2, &v3);
548 gatherLoadBySimdIntTranspose<8>(mem0_, simdoffset, &v0, &v1, &v2, &v3);
550 else if (align == 12)
552 gatherLoadBySimdIntTranspose<12>(mem0_, simdoffset, &v0, &v1, &v2, &v3);
559 GMX_EXPECT_SIMD_REAL_EQ(ref0, v0);
560 GMX_EXPECT_SIMD_REAL_EQ(ref1, v1);
561 GMX_EXPECT_SIMD_REAL_EQ(ref2, v2);
562 GMX_EXPECT_SIMD_REAL_EQ(ref3, v3);
567 TEST_F(SimdFloatingpointUtilTest, gatherLoadBySimdIntTranspose2)
571 SimdInt32 simdoffset;
572 const int nalign = 3;
573 int alignmentList[nalign] = { 4, 8, 12 };
576 for (i = 0; i < nalign; i++)
578 align = alignmentList[i];
579 for (j = 0; j < GMX_SIMD_REAL_WIDTH; j++)
581 mem0_[align * offset_[j]] = val0_[j];
582 mem0_[align * offset_[j] + 1] = val1_[j];
585 simdoffset = load<SimdInt32>(offset_);
586 ref0 = load<SimdReal>(val0_);
587 ref1 = load<SimdReal>(val1_);
591 gatherLoadBySimdIntTranspose<4>(mem0_, simdoffset, &v0, &v1);
595 gatherLoadBySimdIntTranspose<8>(mem0_, simdoffset, &v0, &v1);
597 else if (align == 12)
599 gatherLoadBySimdIntTranspose<12>(mem0_, simdoffset, &v0, &v1);
606 GMX_EXPECT_SIMD_REAL_EQ(ref0, v0);
607 GMX_EXPECT_SIMD_REAL_EQ(ref1, v1);
611 # if GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_REAL
612 TEST_F(SimdFloatingpointUtilTest, gatherLoadUBySimdIntTranspose2)
616 SimdInt32 simdoffset;
617 const int nalign = 3;
618 int alignmentList[nalign] = { 1, 3, 5 };
621 for (i = 0; i < nalign; i++)
623 align = alignmentList[i];
624 for (j = 0; j < GMX_SIMD_REAL_WIDTH; j++)
626 mem0_[align * offset_[j]] = val0_[j];
627 mem0_[align * offset_[j] + 1] = val1_[j];
630 simdoffset = load<SimdInt32>(offset_);
631 ref0 = load<SimdReal>(val0_);
632 ref1 = load<SimdReal>(val1_);
636 gatherLoadUBySimdIntTranspose<1>(mem0_, simdoffset, &v0, &v1);
640 gatherLoadUBySimdIntTranspose<3>(mem0_, simdoffset, &v0, &v1);
644 gatherLoadUBySimdIntTranspose<5>(mem0_, simdoffset, &v0, &v1);
651 GMX_EXPECT_SIMD_REAL_EQ(ref0, v0);
652 GMX_EXPECT_SIMD_REAL_EQ(ref1, v1);
655 # endif // GMX_SIMD_HAVE_GATHER_LOADU_BYSIMDINT_TRANSPOSE_REAL
657 TEST_F(SimdFloatingpointUtilTest, reduceIncr4Sum)
660 SimdReal v0, v1, v2, v3;
661 real sum0, sum1, sum2, sum3, tstsum;
662 FloatingPointTolerance tolerance(defaultRealTolerance());
664 v0 = load<SimdReal>(val0_);
665 v1 = load<SimdReal>(val1_);
666 v2 = load<SimdReal>(val2_);
667 v3 = load<SimdReal>(val3_);
669 sum0 = sum1 = sum2 = sum3 = 0;
670 for (i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
678 // Just put some numbers in memory so we check the addition is correct
684 tstsum = reduceIncr4ReturnSum(mem0_, v0, v1, v2, v3);
686 EXPECT_REAL_EQ_TOL(c0 + sum0, mem0_[0], tolerance);
687 EXPECT_REAL_EQ_TOL(c1 + sum1, mem0_[1], tolerance);
688 EXPECT_REAL_EQ_TOL(c2 + sum2, mem0_[2], tolerance);
689 EXPECT_REAL_EQ_TOL(c3 + sum3, mem0_[3], tolerance);
691 EXPECT_REAL_EQ_TOL(sum0 + sum1 + sum2 + sum3, tstsum, tolerance);
694 # if GMX_SIMD_HAVE_HSIMD_UTIL_REAL
696 TEST_F(SimdFloatingpointUtilTest, loadDualHsimd)
700 // Point p to the upper half of val0_
701 real* p = val0_ + GMX_SIMD_REAL_WIDTH / 2;
703 v0 = load<SimdReal>(val0_);
704 v1 = loadDualHsimd(val0_, p);
706 GMX_EXPECT_SIMD_REAL_EQ(v0, v1);
709 TEST_F(SimdFloatingpointUtilTest, loadDuplicateHsimd)
713 // Point p to the upper half of val0_
714 real* p = val0_ + GMX_SIMD_REAL_WIDTH / 2;
715 // Copy data so upper half is identical to lower
716 for (i = 0; i < GMX_SIMD_REAL_WIDTH / 2; i++)
721 v0 = load<SimdReal>(val0_);
722 v1 = loadDuplicateHsimd(val0_);
724 GMX_EXPECT_SIMD_REAL_EQ(v0, v1);
728 TEST_F(SimdFloatingpointUtilTest, loadU1DualHsimd)
732 real data[2] = { 1, 2 };
734 // Point p to the upper half of val0_
735 real* p = val0_ + GMX_SIMD_REAL_WIDTH / 2;
736 // Set all low elements to data[0], an high to data[1]
737 for (i = 0; i < GMX_SIMD_REAL_WIDTH / 2; i++)
743 v0 = load<SimdReal>(val0_);
744 v1 = loadU1DualHsimd(data);
746 GMX_EXPECT_SIMD_REAL_EQ(v0, v1);
750 TEST_F(SimdFloatingpointUtilTest, storeDualHsimd)
755 // Point p to the upper half of val0_
756 real* p = val0_ + GMX_SIMD_REAL_WIDTH / 2;
758 v0 = load<SimdReal>(val2_);
759 storeDualHsimd(val0_, p, v0);
761 for (i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
763 EXPECT_EQ(val2_[i], val0_[i]);
767 TEST_F(SimdFloatingpointUtilTest, incrDualHsimd)
769 real reference[GMX_SIMD_REAL_WIDTH];
772 // Create reference values
773 for (std::size_t i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
775 reference[i] = val0_[i] + val2_[i];
778 // Point p to the upper half of val0_
779 real* p = val0_ + GMX_SIMD_REAL_WIDTH / 2;
781 v0 = load<SimdReal>(val2_);
782 incrDualHsimd(val0_, p, v0);
784 for (std::size_t i = 0; i < GMX_SIMD_REAL_WIDTH; i++)
786 EXPECT_EQ(reference[i], val0_[i]);
790 TEST_F(SimdFloatingpointUtilTest, incrDualHsimdOverlapping)
792 real reference[GMX_SIMD_REAL_WIDTH / 2];
795 // Create reference values
796 for (std::size_t i = 0; i < GMX_SIMD_REAL_WIDTH / 2; i++)
798 reference[i] = val0_[i] + val2_[i] + val2_[GMX_SIMD_REAL_WIDTH / 2 + i];
801 v0 = load<SimdReal>(val2_);
802 incrDualHsimd(val0_, val0_, v0);
804 for (std::size_t i = 0; i < GMX_SIMD_REAL_WIDTH / 2; i++)
806 EXPECT_EQ(reference[i], val0_[i]);
810 TEST_F(SimdFloatingpointUtilTest, decr3Hsimd)
813 real ref[3 * GMX_SIMD_REAL_WIDTH / 2];
815 FloatingPointTolerance tolerance(defaultRealTolerance());
817 // Point p to the upper half of val1_
818 real* p = val1_ + GMX_SIMD_REAL_WIDTH / 2;
819 for (i = 0; i < GMX_SIMD_REAL_WIDTH / 2; i++)
821 ref[i] = val0_[i] - (val1_[i] + p[i]);
823 p = val2_ + GMX_SIMD_REAL_WIDTH / 2;
824 for (j = 0; j < GMX_SIMD_REAL_WIDTH / 2; i++, j++)
826 ref[i] = val0_[i] - (val2_[j] + p[j]);
828 p = val3_ + GMX_SIMD_REAL_WIDTH / 2;
829 for (j = 0; j < GMX_SIMD_REAL_WIDTH / 2; i++, j++)
831 ref[i] = val0_[i] - (val3_[j] + p[j]);
834 v0 = load<SimdReal>(val1_);
835 v1 = load<SimdReal>(val2_);
836 v2 = load<SimdReal>(val3_);
837 decr3Hsimd(val0_, v0, v1, v2);
839 for (i = 0; i < 3 * GMX_SIMD_REAL_WIDTH / 2; i++)
841 EXPECT_REAL_EQ_TOL(ref[i], val0_[i], tolerance);
846 TEST_F(SimdFloatingpointUtilTest, gatherLoadTranspose2Hsimd)
851 const int nalign = 3;
852 int alignmentList[nalign] = { 2, 4, c_simdBestPairAlignment };
855 for (i = 0; i < nalign; i++)
857 align = alignmentList[i];
858 for (j = 0; j < GMX_SIMD_REAL_WIDTH / 2; j++)
860 // Use mem0_ as base for lower half
861 mem0_[align * offset_[j]] = val0_[j];
862 mem0_[align * offset_[j] + 1] = val1_[j];
863 // Use mem1_ as base for upper half
864 mem1_[align * offset_[j]] = val0_[GMX_SIMD_REAL_WIDTH / 2 + j];
865 mem1_[align * offset_[j] + 1] = val1_[GMX_SIMD_REAL_WIDTH / 2 + j];
868 ref0 = load<SimdReal>(val0_);
869 ref1 = load<SimdReal>(val1_);
873 gatherLoadTransposeHsimd<2>(mem0_, mem1_, offset_, &v0, &v1);
877 gatherLoadTransposeHsimd<4>(mem0_, mem1_, offset_, &v0, &v1);
879 else if (align == c_simdBestPairAlignment)
881 gatherLoadTransposeHsimd<c_simdBestPairAlignment>(mem0_, mem1_, offset_, &v0, &v1);
888 GMX_EXPECT_SIMD_REAL_EQ(ref0, v0);
889 GMX_EXPECT_SIMD_REAL_EQ(ref1, v1);
894 TEST_F(SimdFloatingpointUtilTest, reduceIncr4SumHsimd)
898 real sum0, sum1, sum2, sum3, tstsum;
899 FloatingPointTolerance tolerance(defaultRealTolerance());
901 // Use the half-SIMD storage in memory val0_ and val1_.
902 v0 = load<SimdReal>(val0_);
903 v1 = load<SimdReal>(val1_);
905 sum0 = sum1 = sum2 = sum3 = 0;
906 for (i = 0; i < GMX_SIMD_REAL_WIDTH / 2; i++)
909 sum1 += val0_[GMX_SIMD_REAL_WIDTH / 2 + i];
911 sum3 += val1_[GMX_SIMD_REAL_WIDTH / 2 + i];
914 // Just put some numbers in memory so we check the addition is correct
920 tstsum = reduceIncr4ReturnSumHsimd(mem0_, v0, v1);
922 EXPECT_REAL_EQ_TOL(c0 + sum0, mem0_[0], tolerance);
923 EXPECT_REAL_EQ_TOL(c1 + sum1, mem0_[1], tolerance);
924 EXPECT_REAL_EQ_TOL(c2 + sum2, mem0_[2], tolerance);
925 EXPECT_REAL_EQ_TOL(c3 + sum3, mem0_[3], tolerance);
927 EXPECT_REAL_EQ_TOL(sum0 + sum1 + sum2 + sum3, tstsum, tolerance);
930 # endif // GMX_SIMD_HAVE_HSIMD_UTIL_REAL
932 // Test Currently doesn't work for GMX_SIMD_REAL_WIDTH<4. Should be fixed by having GMX_EXPECT_SIMD_REAL_EQ which works for both Simd and Simd4
933 # if GMX_SIMD_HAVE_4NSIMD_UTIL_REAL && GMX_SIMD_REAL_WIDTH >= 4
935 TEST_F(SimdFloatingpointUtilTest, loadUNDuplicate4)
939 real data[GMX_SIMD_REAL_WIDTH / 4];
940 std::iota(data, data + GMX_SIMD_REAL_WIDTH / 4, 1);
942 for (i = 0; i < GMX_SIMD_REAL_WIDTH / 4; i++)
944 val0_[i * 4] = val0_[i * 4 + 1] = val0_[i * 4 + 2] = val0_[i * 4 + 3] = data[i];
947 v0 = load<Simd4NReal>(val0_);
948 v1 = loadUNDuplicate4(data);
950 GMX_EXPECT_SIMD_REAL_EQ(v0, v1);
953 TEST_F(SimdFloatingpointUtilTest, load4DuplicateN)
957 real data[4] = { 1, 2, 3, 4 };
959 for (i = 0; i < GMX_SIMD_REAL_WIDTH / 4; i++)
961 val0_[i * 4] = data[0];
962 val0_[i * 4 + 1] = data[1];
963 val0_[i * 4 + 2] = data[2];
964 val0_[i * 4 + 3] = data[3];
967 v0 = load<Simd4NReal>(val0_);
968 v1 = load4DuplicateN(val0_);
970 GMX_EXPECT_SIMD_REAL_EQ(v0, v1);
973 TEST_F(SimdFloatingpointUtilTest, loadU4NOffset)
975 constexpr int offset = 6; // non power of 2
976 constexpr int dataLen = 4 + offset * (GMX_SIMD_REAL_WIDTH / 4 - 1);
978 std::iota(data, data + dataLen, 1);
980 for (int i = 0; i < GMX_SIMD_REAL_WIDTH / 4; i++)
982 val0_[i * 4] = data[0 + offset * i];
983 val0_[i * 4 + 1] = data[1 + offset * i];
984 val0_[i * 4 + 2] = data[2 + offset * i];
985 val0_[i * 4 + 3] = data[3 + offset * i];
988 const Simd4NReal v0 = load<Simd4NReal>(val0_);
989 const Simd4NReal v1 = loadU4NOffset(data, offset);
991 GMX_EXPECT_SIMD_REAL_EQ(v0, v1);
994 # endif // GMX_SIMD_HAVE_4NSIMD_UTIL_REAL
996 #endif // GMX_SIMD_HAVE_REAL