8cddef033da962e11e12f07fadd4d74ccad09ec7
[alexxy/gromacs.git] / src / gromacs / gpu_utils / tests / device_buffer.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2020,2021, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35 /*! \internal \file
36  * \brief Tests for device buffer
37  *
38  * \author Artem Zhmurov <zhmurov@gmail.com>
39  *
40  * \ingroup module_gpu_utils
41  */
42 #include "gmxpre.h"
43
44 #include "config.h"
45
46 #if GMX_GPU
47 #    include <numeric>
48
49 #    include <gmock/gmock.h>
50 #    include <gtest/gtest.h>
51
52 #    include "gromacs/gpu_utils/device_context.h"
53 #    include "gromacs/gpu_utils/device_stream.h"
54 #    include "gromacs/gpu_utils/devicebuffer.h"
55 #    include "gromacs/gpu_utils/hostallocator.h"
56
57 #    include "testutils/test_hardware_environment.h"
58 #    include "testutils/testasserts.h"
59
60 namespace gmx
61 {
62
63 template<typename ValueType>
64 BasicVector<ValueType>& operator++(BasicVector<ValueType>& in)
65 {
66     in[XX]++;
67     in[YY]++;
68     in[ZZ]++;
69     return in;
70 }
71
72 template<typename ValueType>
73 BasicVector<ValueType>& operator++(BasicVector<ValueType>& in, int /* n */)
74 {
75     BasicVector<ValueType> temp = *in;
76     ++*in;
77     return temp;
78 }
79
80 template<typename ValueType>
81 inline bool operator==(const BasicVector<ValueType>& lhs, const BasicVector<ValueType>& rhs)
82 {
83     return lhs[XX] == rhs[XX] && lhs[YY] == rhs[YY] && lhs[ZZ] == rhs[ZZ];
84 }
85
86 namespace test
87 {
88
89 namespace
90 {
91
92 using testing::Eq;
93 using testing::Pointwise;
94
95 //! Test fixture (needed for typed tests)
96 template<typename T>
97 class DeviceBufferTest : public ::testing::Test
98 {
99 };
100
101 using TypeParamList = testing::Types<short, int, float, double, gmx::RVec>;
102 TYPED_TEST_CASE(DeviceBufferTest, TypeParamList);
103
104 TYPED_TEST(DeviceBufferTest, CanAllocateAndFreeDeviceBuffer)
105 {
106     for (const auto& testDevice : getTestHardwareEnvironment()->getTestDeviceList())
107     {
108         const DeviceContext& deviceContext = testDevice->deviceContext();
109         setActiveDevice(testDevice->deviceInfo());
110
111         DeviceBuffer<TypeParam> buffer;
112         int                     numValues = 123;
113         allocateDeviceBuffer(&buffer, numValues, deviceContext);
114         freeDeviceBuffer(&buffer);
115     }
116 }
117
118 TYPED_TEST(DeviceBufferTest, CanReallocateAndFreeDeviceBuffer)
119 {
120     for (const auto& testDevice : getTestHardwareEnvironment()->getTestDeviceList())
121     {
122         const DeviceContext& deviceContext = testDevice->deviceContext();
123         setActiveDevice(testDevice->deviceInfo());
124
125         DeviceBuffer<TypeParam> buffer;
126         int                     currentNumValues    = 456;
127         int                     newNumValues        = 789;
128         int                     currentMaxNumValues = 0;
129         allocateDeviceBuffer(&buffer, currentNumValues, deviceContext);
130         reallocateDeviceBuffer(&buffer, newNumValues, &currentNumValues, &currentMaxNumValues, deviceContext);
131         freeDeviceBuffer(&buffer);
132     }
133 }
134
135 //! Initial value to fill the buffer of the scalar type
136 template<typename T>
137 const T c_initialValue = static_cast<T>(1);
138
139 //! Initial value to fill the buffer of the vector type
140 template<>
141 const gmx::RVec c_initialValue<gmx::RVec> = { 1, -2, 3 };
142
143
144 TYPED_TEST(DeviceBufferTest, CanCopyToAndFromDevice)
145 {
146     for (auto transferKind : { GpuApiCallBehavior::Sync, GpuApiCallBehavior::Async })
147     {
148         PinningPolicy pinningPolicy = (transferKind == GpuApiCallBehavior::Async)
149                                               ? PinningPolicy::PinnedIfSupported
150                                               : PinningPolicy::CannotBePinned;
151         for (const auto& testDevice : getTestHardwareEnvironment()->getTestDeviceList())
152         {
153             const DeviceContext& deviceContext = testDevice->deviceContext();
154             const DeviceStream&  deviceStream  = testDevice->deviceStream();
155             setActiveDevice(testDevice->deviceInfo());
156
157             DeviceBuffer<TypeParam> buffer;
158             int                     numValues = 123;
159             allocateDeviceBuffer(&buffer, numValues, deviceContext);
160             HostVector<TypeParam> valuesIn(numValues, { pinningPolicy });
161             HostVector<TypeParam> valuesOut(numValues, { pinningPolicy });
162
163             std::iota(valuesIn.begin(), valuesIn.end(), c_initialValue<TypeParam>);
164
165             copyToDeviceBuffer(&buffer, valuesIn.data(), 0, numValues, deviceStream, transferKind, nullptr);
166             copyFromDeviceBuffer(
167                     valuesOut.data(), &buffer, 0, numValues, deviceStream, transferKind, nullptr);
168             if (transferKind == GpuApiCallBehavior::Async)
169             {
170                 deviceStream.synchronize();
171             }
172             EXPECT_THAT(valuesOut, Pointwise(Eq(), valuesIn))
173                     << "Changed after H2D and D2H " << enumValueToString(transferKind) << " copy.";
174             freeDeviceBuffer(&buffer);
175         }
176     }
177 }
178
179 TYPED_TEST(DeviceBufferTest, CanCopyToAndFromDeviceWithOffset)
180 {
181     for (auto transferKind : { GpuApiCallBehavior::Sync, GpuApiCallBehavior::Async })
182     {
183         PinningPolicy pinningPolicy = (transferKind == GpuApiCallBehavior::Async)
184                                               ? PinningPolicy::PinnedIfSupported
185                                               : PinningPolicy::CannotBePinned;
186         for (const auto& testDevice : getTestHardwareEnvironment()->getTestDeviceList())
187         {
188             const DeviceContext& deviceContext = testDevice->deviceContext();
189             const DeviceStream&  deviceStream  = testDevice->deviceStream();
190             setActiveDevice(testDevice->deviceInfo());
191
192             DeviceBuffer<TypeParam> buffer;
193             int                     numValues = 123;
194             allocateDeviceBuffer(&buffer, 2 * numValues, deviceContext);
195             HostVector<TypeParam> valuesIn(numValues, { pinningPolicy });
196             HostVector<TypeParam> valuesOut(2 * numValues, { pinningPolicy });
197
198             std::iota(valuesIn.begin(), valuesIn.end(), c_initialValue<TypeParam>);
199
200             // Fill the buffer with two copies of valuesIn, one after the other.
201             copyToDeviceBuffer(&buffer, valuesIn.data(), 0, numValues, deviceStream, transferKind, nullptr);
202             copyToDeviceBuffer(
203                     &buffer, valuesIn.data(), numValues, numValues, deviceStream, transferKind, nullptr);
204             // Wait until GPU is done andd o the same copying on the CPU, so we can test it works correctly.
205             if (transferKind == GpuApiCallBehavior::Async)
206             {
207                 deviceStream.synchronize();
208             }
209             valuesIn.insert(valuesIn.end(), valuesIn.begin(), valuesIn.end());
210
211             copyFromDeviceBuffer(
212                     valuesOut.data(), &buffer, 0, 2 * numValues, deviceStream, transferKind, nullptr);
213             if (transferKind == GpuApiCallBehavior::Async)
214             {
215                 deviceStream.synchronize();
216             }
217             EXPECT_THAT(valuesOut, Pointwise(Eq(), valuesIn))
218                     << "Changed after H2D and D2H " << enumValueToString(transferKind) << " copy.";
219
220             SCOPED_TRACE("Checking the copy respects the output range");
221
222             // Remove the first element, and push another copy of the last
223             // element, so we can check that a copy of all of the data
224             // skipping the first element correctly over-writes exactly
225             // all but one of the old values.
226             valuesIn.erase(valuesIn.begin());
227             valuesIn.push_back(valuesIn.back());
228             copyFromDeviceBuffer(
229                     valuesOut.data(), &buffer, 1, 2 * numValues - 1, deviceStream, transferKind, nullptr);
230             if (transferKind == GpuApiCallBehavior::Async)
231             {
232                 deviceStream.synchronize();
233             }
234             EXPECT_THAT(valuesOut, Pointwise(Eq(), valuesIn))
235                     << "Changed after H2D and D2H " << enumValueToString(transferKind) << " copy.";
236         }
237     }
238 }
239
240 #    if GMX_GPU_CUDA
241
242 TYPED_TEST(DeviceBufferTest, CanCopyBetweenDeviceBuffers)
243 {
244     for (auto transferKind : { GpuApiCallBehavior::Sync, GpuApiCallBehavior::Async })
245     {
246         PinningPolicy pinningPolicy = (transferKind == GpuApiCallBehavior::Async)
247                                               ? PinningPolicy::PinnedIfSupported
248                                               : PinningPolicy::CannotBePinned;
249         for (const auto& testDeviceIn : getTestHardwareEnvironment()->getTestDeviceList())
250         {
251             for (const auto& testDeviceOut : getTestHardwareEnvironment()->getTestDeviceList())
252             {
253                 int                   numValues = 321;
254                 HostVector<TypeParam> valuesIn(numValues, { pinningPolicy });
255                 HostVector<TypeParam> valuesOut(numValues, { pinningPolicy });
256
257                 std::iota(valuesIn.begin(), valuesIn.end(), c_initialValue<TypeParam>);
258
259                 const DeviceContext& deviceContextIn = testDeviceIn->deviceContext();
260                 const DeviceStream&  deviceStreamIn  = testDeviceIn->deviceStream();
261                 setActiveDevice(testDeviceIn->deviceInfo());
262                 DeviceBuffer<TypeParam> bufferIn;
263                 allocateDeviceBuffer(&bufferIn, numValues, deviceContextIn);
264
265                 const DeviceContext& deviceContextOut = testDeviceOut->deviceContext();
266                 const DeviceStream&  deviceStreamOut  = testDeviceOut->deviceStream();
267                 setActiveDevice(testDeviceOut->deviceInfo());
268                 DeviceBuffer<TypeParam> bufferOut;
269                 allocateDeviceBuffer(&bufferOut, numValues, deviceContextOut);
270
271                 copyToDeviceBuffer(
272                         &bufferIn, valuesIn.data(), 0, numValues, deviceStreamIn, transferKind, nullptr);
273                 copyBetweenDeviceBuffers(
274                         &bufferOut, &bufferIn, numValues, deviceStreamIn, transferKind, nullptr);
275                 if (transferKind == GpuApiCallBehavior::Async)
276                 {
277                     deviceStreamIn.synchronize();
278                 }
279                 copyFromDeviceBuffer(
280                         valuesOut.data(), &bufferOut, 0, numValues, deviceStreamOut, transferKind, nullptr);
281                 if (transferKind == GpuApiCallBehavior::Async)
282                 {
283                     deviceStreamOut.synchronize();
284                 }
285                 EXPECT_THAT(valuesOut, Pointwise(Eq(), valuesIn))
286                         << "Changed after H2D, D2D and D2H " << enumValueToString(transferKind)
287                         << " copy.";
288                 freeDeviceBuffer(&bufferIn);
289                 freeDeviceBuffer(&bufferOut);
290             }
291         }
292     }
293 }
294
295 #    endif // GMX_GPU_CUDA
296
297
298 } // namespace
299 } // namespace test
300 } // namespace gmx
301
302 #endif // GMX_GPU