17721021961a306aee94a5133664ded2bc443646
[alexxy/gromacs.git] / src / gromacs / fft / tests / fft_mpi.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2021, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35 /*! \internal \file
36  * \brief
37  * Tests utilities for fft calculations.
38  *
39  * \author Gaurav Garg <gaugarg@nvidia.com>
40  * \ingroup module_fft
41  */
42 #include "gmxpre.h"
43
44 #include "gromacs/fft/fft.h"
45
46 #include "config.h"
47
48 #include <algorithm>
49 #include <vector>
50 #include <random>
51
52 #include <gmock/gmock.h>
53 #include <gtest/gtest.h>
54
55 #include "gromacs/fft/gpu_3dfft.h"
56 #include "gromacs/gpu_utils/clfftinitializer.h"
57 #if GMX_GPU
58 #    include "gromacs/gpu_utils/devicebuffer.h"
59 #endif
60 #include "gromacs/utility/stringutil.h"
61
62 #include "testutils/refdata.h"
63 #include "testutils/mpitest.h"
64 #include "testutils/test_hardware_environment.h"
65 #include "testutils/testasserts.h"
66 #include "testutils/testmatchers.h"
67
68 namespace gmx
69 {
70 namespace test
71 {
72 using GpuFftTestParams = std::tuple<IVec, // size of grid
73                                     int,  // domains in x
74                                     int,  // domains in y
75                                     FftBackend>;
76
77 /*! \brief Check that the real grid after forward and backward
78  * 3D transforms matches the input real grid. */
79 static void checkRealGrid(const IVec           realGridSizeFull,
80                           const ivec           realGridSize,
81                           const ivec           realGridSizePadded,
82                           ArrayRef<const real> inputRealGrid,
83                           ArrayRef<real>       outputRealGridValues)
84 {
85     // Normalize the output (as the implementation does not
86     // normalize either FFT)
87     const real normalizationConstant =
88             1.0 / (realGridSizeFull[XX] * realGridSizeFull[YY] * realGridSizeFull[ZZ]);
89     std::transform(outputRealGridValues.begin(),
90                    outputRealGridValues.end(),
91                    outputRealGridValues.begin(),
92                    [normalizationConstant](const real r) { return r * normalizationConstant; });
93     // Check the real grid, skipping unused data from the padding
94     const auto realGridTolerance = relativeToleranceAsFloatingPoint(10, 1e-6);
95     for (int i = 0; i < realGridSize[XX] * realGridSize[YY]; i++)
96     {
97         auto expected =
98                 arrayRefFromArray(inputRealGrid.data() + i * realGridSizePadded[ZZ], realGridSize[ZZ]);
99         auto actual = arrayRefFromArray(outputRealGridValues.data() + i * realGridSizePadded[ZZ],
100                                         realGridSize[ZZ]);
101         EXPECT_THAT(actual, Pointwise(RealEq(realGridTolerance), expected))
102                 << formatString("checking backward transform part %d", i);
103     }
104 }
105
106 class GpuFftTest3D : public ::testing::Test, public ::testing::WithParamInterface<GpuFftTestParams>
107 {
108 public:
109     GpuFftTest3D() = default;
110
111
112     //! The whole logic being tested is contained here
113     static void runTest(const GpuFftTestParams& param)
114     {
115         const auto& deviceList = getTestHardwareEnvironment()->getTestDeviceList();
116
117         int rank;
118         MPI_Comm_rank(MPI_COMM_WORLD, &rank);
119
120         const auto& testDevice = deviceList[rank % deviceList.size()];
121
122         const DeviceContext& deviceContext = testDevice->deviceContext();
123         setActiveDevice(testDevice->deviceInfo());
124         const DeviceStream& deviceStream = testDevice->deviceStream();
125
126         FftBackend backend;
127
128         int  numDomainsX;
129         int  numDomainsY;
130         IVec realGridSizeFull;
131         std::tie(realGridSizeFull, numDomainsX, numDomainsY, backend) = param;
132
133         // define local grid sizes - this follows same logic as GROMACS implementation
134         std::vector<int> localGridSizesX(numDomainsX);
135         for (unsigned int i = 0; i < localGridSizesX.size(); ++i)
136         {
137             localGridSizesX[i] = ((i + 1) * realGridSizeFull[XX] / numDomainsX)
138                                  - (i * realGridSizeFull[XX] / numDomainsX);
139             ASSERT_GT(localGridSizesX[i], 0);
140         }
141
142         std::vector<int> localGridSizesY(numDomainsY);
143         for (unsigned int i = 0; i < localGridSizesY.size(); ++i)
144         {
145             localGridSizesY[i] = ((i + 1) * realGridSizeFull[YY] / numDomainsY)
146                                  - (i * realGridSizeFull[YY] / numDomainsY);
147             ASSERT_GT(localGridSizesY[i], 0);
148         }
149
150         ivec realGridSize;
151         ivec realGridSizePadded;
152         ivec complexGridSizePadded;
153
154         // Allocate the device buffers
155         DeviceBuffer<float> realGrid, complexGrid;
156
157         const bool     performOutOfPlaceFFT = true;
158         const MPI_Comm comm                 = MPI_COMM_WORLD;
159         const bool     allocateGrid         = true;
160         const int      nz                   = realGridSizeFull[ZZ];
161         Gpu3dFft       gpu3dFft(backend,
162                           allocateGrid,
163                           comm,
164                           localGridSizesX,
165                           localGridSizesY,
166                           nz,
167                           performOutOfPlaceFFT,
168                           deviceContext,
169                           deviceStream,
170                           realGridSize,
171                           realGridSizePadded,
172                           complexGridSizePadded,
173                           &realGrid,
174                           &complexGrid);
175
176         int sizeInReals = realGridSizePadded[0] * realGridSizePadded[1] * realGridSizePadded[2];
177
178         // initialze random input data
179         std::vector<real>                in(sizeInReals);
180         std::uniform_real_distribution<> dis(-10.0f, 10.0f);
181         std::minstd_rand                 gen(time(NULL) + rank);
182         std::generate(in.begin(), in.end(), [&dis, &gen]() {
183             // random number between -10 to 10
184             return dis(gen);
185         });
186
187         // Transfer the real grid input data for the FFT
188         copyToDeviceBuffer(
189                 &realGrid, in.data(), 0, in.size(), deviceStream, GpuApiCallBehavior::Sync, nullptr);
190
191         // Do the forward FFT to compute the complex grid
192         CommandEvent* timingEvent = nullptr;
193         gpu3dFft.perform3dFft(GMX_FFT_REAL_TO_COMPLEX, timingEvent);
194
195         // clear real grid after the forward FFT, so that we know the
196         // final grid is one produced by the complex FFT, not just leftovers
197         clearDeviceBufferAsync(&realGrid, 0, sizeInReals, deviceStream);
198
199         // Do the back transform
200         gpu3dFft.perform3dFft(GMX_FFT_COMPLEX_TO_REAL, timingEvent);
201         deviceStream.synchronize();
202
203         // Transfer the real grid back from the device
204         std::vector<float> outputRealGridValues(in.size());
205         copyFromDeviceBuffer(outputRealGridValues.data(),
206                              &realGrid,
207                              0,
208                              outputRealGridValues.size(),
209                              deviceStream,
210                              GpuApiCallBehavior::Sync,
211                              nullptr);
212
213         checkRealGrid(realGridSizeFull, realGridSize, realGridSizePadded, in, outputRealGridValues);
214     }
215 };
216
217 TEST_P(GpuFftTest3D, GpuFftDecomposition)
218 {
219     GMX_MPI_TEST(4);
220     GpuFftTestParams params = GetParam();
221     runTest(params);
222 }
223
224 std::vector<GpuFftTestParams> const inputs{
225     { IVec{ 5, 6, 9 }, 4, 1, FftBackend::HeFFTe_CUDA}, // slab decomposition
226     { IVec{ 5, 6, 9 }, 2, 2, FftBackend::HeFFTe_CUDA} // pencil decomposition
227 };
228
229 INSTANTIATE_TEST_SUITE_P(GpuFft, GpuFftTest3D, ::testing::ValuesIn(inputs));
230
231 } // namespace test
232 } // namespace gmx