2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2021, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
37 * Tests utilities for fft calculations.
39 * \author Gaurav Garg <gaugarg@nvidia.com>
44 #include "gromacs/fft/fft.h"
52 #include <gmock/gmock.h>
53 #include <gtest/gtest.h>
55 #include "gromacs/fft/gpu_3dfft.h"
56 #include "gromacs/gpu_utils/clfftinitializer.h"
58 # include "gromacs/gpu_utils/devicebuffer.h"
60 #include "gromacs/utility/stringutil.h"
62 #include "testutils/refdata.h"
63 #include "testutils/mpitest.h"
64 #include "testutils/test_hardware_environment.h"
65 #include "testutils/testasserts.h"
66 #include "testutils/testmatchers.h"
72 using GpuFftTestParams = std::tuple<IVec, // size of grid
77 /*! \brief Check that the real grid after forward and backward
78 * 3D transforms matches the input real grid. */
79 static void checkRealGrid(const IVec realGridSizeFull,
80 const ivec realGridSize,
81 const ivec realGridSizePadded,
82 ArrayRef<const real> inputRealGrid,
83 ArrayRef<real> outputRealGridValues)
85 // Normalize the output (as the implementation does not
86 // normalize either FFT)
87 const real normalizationConstant =
88 1.0 / (realGridSizeFull[XX] * realGridSizeFull[YY] * realGridSizeFull[ZZ]);
89 std::transform(outputRealGridValues.begin(),
90 outputRealGridValues.end(),
91 outputRealGridValues.begin(),
92 [normalizationConstant](const real r) { return r * normalizationConstant; });
93 // Check the real grid, skipping unused data from the padding
94 const auto realGridTolerance = relativeToleranceAsFloatingPoint(10, 1e-6);
95 for (int i = 0; i < realGridSize[XX] * realGridSize[YY]; i++)
98 arrayRefFromArray(inputRealGrid.data() + i * realGridSizePadded[ZZ], realGridSize[ZZ]);
99 auto actual = arrayRefFromArray(outputRealGridValues.data() + i * realGridSizePadded[ZZ],
101 EXPECT_THAT(actual, Pointwise(RealEq(realGridTolerance), expected))
102 << formatString("checking backward transform part %d", i);
106 class GpuFftTest3D : public ::testing::Test, public ::testing::WithParamInterface<GpuFftTestParams>
109 GpuFftTest3D() = default;
112 //! The whole logic being tested is contained here
113 static void runTest(const GpuFftTestParams& param)
115 const auto& deviceList = getTestHardwareEnvironment()->getTestDeviceList();
118 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
120 const auto& testDevice = deviceList[rank % deviceList.size()];
122 const DeviceContext& deviceContext = testDevice->deviceContext();
123 setActiveDevice(testDevice->deviceInfo());
124 const DeviceStream& deviceStream = testDevice->deviceStream();
130 IVec realGridSizeFull;
131 std::tie(realGridSizeFull, numDomainsX, numDomainsY, backend) = param;
133 // define local grid sizes - this follows same logic as GROMACS implementation
134 std::vector<int> localGridSizesX(numDomainsX);
135 for (unsigned int i = 0; i < localGridSizesX.size(); ++i)
137 localGridSizesX[i] = ((i + 1) * realGridSizeFull[XX] / numDomainsX)
138 - (i * realGridSizeFull[XX] / numDomainsX);
139 ASSERT_GT(localGridSizesX[i], 0);
142 std::vector<int> localGridSizesY(numDomainsY);
143 for (unsigned int i = 0; i < localGridSizesY.size(); ++i)
145 localGridSizesY[i] = ((i + 1) * realGridSizeFull[YY] / numDomainsY)
146 - (i * realGridSizeFull[YY] / numDomainsY);
147 ASSERT_GT(localGridSizesY[i], 0);
151 ivec realGridSizePadded;
152 ivec complexGridSizePadded;
154 // Allocate the device buffers
155 DeviceBuffer<float> realGrid, complexGrid;
157 const bool performOutOfPlaceFFT = true;
158 const MPI_Comm comm = MPI_COMM_WORLD;
159 const bool allocateGrid = true;
160 const int nz = realGridSizeFull[ZZ];
161 Gpu3dFft gpu3dFft(backend,
167 performOutOfPlaceFFT,
172 complexGridSizePadded,
176 int sizeInReals = realGridSizePadded[0] * realGridSizePadded[1] * realGridSizePadded[2];
178 // initialze random input data
179 std::vector<real> in(sizeInReals);
180 std::uniform_real_distribution<> dis(-10.0f, 10.0f);
181 std::minstd_rand gen(time(NULL) + rank);
182 std::generate(in.begin(), in.end(), [&dis, &gen]() {
183 // random number between -10 to 10
187 // Transfer the real grid input data for the FFT
189 &realGrid, in.data(), 0, in.size(), deviceStream, GpuApiCallBehavior::Sync, nullptr);
191 // Do the forward FFT to compute the complex grid
192 CommandEvent* timingEvent = nullptr;
193 gpu3dFft.perform3dFft(GMX_FFT_REAL_TO_COMPLEX, timingEvent);
195 // clear real grid after the forward FFT, so that we know the
196 // final grid is one produced by the complex FFT, not just leftovers
197 clearDeviceBufferAsync(&realGrid, 0, sizeInReals, deviceStream);
199 // Do the back transform
200 gpu3dFft.perform3dFft(GMX_FFT_COMPLEX_TO_REAL, timingEvent);
201 deviceStream.synchronize();
203 // Transfer the real grid back from the device
204 std::vector<float> outputRealGridValues(in.size());
205 copyFromDeviceBuffer(outputRealGridValues.data(),
208 outputRealGridValues.size(),
210 GpuApiCallBehavior::Sync,
213 checkRealGrid(realGridSizeFull, realGridSize, realGridSizePadded, in, outputRealGridValues);
217 TEST_P(GpuFftTest3D, GpuFftDecomposition)
220 GpuFftTestParams params = GetParam();
224 std::vector<GpuFftTestParams> const inputs{
225 { IVec{ 5, 6, 9 }, 4, 1, FftBackend::HeFFTe_CUDA}, // slab decomposition
226 { IVec{ 5, 6, 9 }, 2, 2, FftBackend::HeFFTe_CUDA} // pencil decomposition
229 INSTANTIATE_TEST_SUITE_P(GpuFft, GpuFftTest3D, ::testing::ValuesIn(inputs));