From fa2de53af03fe27220c250af938bbc8385312830 Mon Sep 17 00:00:00 2001 From: Artem Zhmurov Date: Wed, 24 Feb 2021 19:24:01 +0000 Subject: [PATCH] Add tests on asynchronous host-to-device copy and back The copy routines take different paths depending on transfer kind, hence both sync and async copy should be tested. This adds tests for async calls. --- src/gromacs/gpu_utils/gpu_utils.cpp | 9 ++ src/gromacs/gpu_utils/gpu_utils.h | 13 +- src/gromacs/gpu_utils/tests/device_buffer.cpp | 144 +++++++++++------- 3 files changed, 105 insertions(+), 61 deletions(-) diff --git a/src/gromacs/gpu_utils/gpu_utils.cpp b/src/gromacs/gpu_utils/gpu_utils.cpp index 663137084f..5fab6b7379 100644 --- a/src/gromacs/gpu_utils/gpu_utils.cpp +++ b/src/gromacs/gpu_utils/gpu_utils.cpp @@ -45,12 +45,21 @@ #include "config.h" #include "gromacs/utility/arrayref.h" +#include "gromacs/utility/enumerationhelpers.h" #include "gromacs/utility/stringutil.h" #ifdef _MSC_VER # pragma warning(disable : 6237) #endif +const char* enumValueToString(GpuApiCallBehavior enumValue) +{ + static constexpr gmx::EnumerationArray s_gpuApiCallBehaviorNames = { + "Synchronous", "Asynchronous" + }; + return s_gpuApiCallBehaviorNames[enumValue]; +} + /*! \brief Help build a descriptive message in \c error if there are * \c errorReasons why nonbondeds on a GPU are not supported. * diff --git a/src/gromacs/gpu_utils/gpu_utils.h b/src/gromacs/gpu_utils/gpu_utils.h index fce1e99580..378015fb30 100644 --- a/src/gromacs/gpu_utils/gpu_utils.h +++ b/src/gromacs/gpu_utils/gpu_utils.h @@ -4,7 +4,7 @@ * Copyright (c) 1991-2000, University of Groningen, The Netherlands. * Copyright (c) 2001-2010, The GROMACS development team. * Copyright (c) 2012,2013,2014,2015,2016 by the GROMACS development team. - * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by + * Copyright (c) 2017,2018,2019,2020,2021, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -60,12 +60,19 @@ class MDLogger; } //! Enum which is only used to describe transfer calls at the moment -enum class GpuApiCallBehavior +enum class GpuApiCallBehavior : int { + //! Synchronous Sync, - Async + //! Asynchronous + Async, + //! Size of the enumeration + Count }; +//! String corresponding to GPU API call behavior +const char* enumValueToString(GpuApiCallBehavior enumValue); + //! Types of actions associated to waiting or checking the completion of GPU tasks enum class GpuTaskCompletion { diff --git a/src/gromacs/gpu_utils/tests/device_buffer.cpp b/src/gromacs/gpu_utils/tests/device_buffer.cpp index 86cfa6bdd5..56bbd91afc 100644 --- a/src/gromacs/gpu_utils/tests/device_buffer.cpp +++ b/src/gromacs/gpu_utils/tests/device_buffer.cpp @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2020, by the GROMACS development team, led by + * Copyright (c) 2020,2021, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -52,6 +52,7 @@ # include "gromacs/gpu_utils/device_context.h" # include "gromacs/gpu_utils/device_stream.h" # include "gromacs/gpu_utils/devicebuffer.h" +# include "gromacs/gpu_utils/hostallocator.h" # include "testutils/test_hardware_environment.h" # include "testutils/testasserts.h" @@ -142,73 +143,100 @@ const gmx::RVec c_initialValue = { 1, -2, 3 }; TYPED_TEST(DeviceBufferTest, CanCopyToAndFromDevice) { - for (const auto& testDevice : getTestHardwareEnvironment()->getTestDeviceList()) + for (auto transferKind : { GpuApiCallBehavior::Sync, GpuApiCallBehavior::Async }) { - const DeviceContext& deviceContext = testDevice->deviceContext(); - const DeviceStream& deviceStream = testDevice->deviceStream(); - setActiveDevice(testDevice->deviceInfo()); - - DeviceBuffer buffer; - int numValues = 123; - allocateDeviceBuffer(&buffer, numValues, deviceContext); - std::vector valuesIn(numValues); - std::vector valuesOut(numValues); - - std::iota(valuesIn.begin(), valuesIn.end(), c_initialValue); - - copyToDeviceBuffer( - &buffer, valuesIn.data(), 0, numValues, deviceStream, GpuApiCallBehavior::Sync, nullptr); - copyFromDeviceBuffer( - valuesOut.data(), &buffer, 0, numValues, deviceStream, GpuApiCallBehavior::Sync, nullptr); - EXPECT_THAT(valuesOut, Pointwise(Eq(), valuesIn)) << "Changed after H2D and D2H copy."; - freeDeviceBuffer(&buffer); + PinningPolicy pinningPolicy = (transferKind == GpuApiCallBehavior::Async) + ? PinningPolicy::PinnedIfSupported + : PinningPolicy::CannotBePinned; + for (const auto& testDevice : getTestHardwareEnvironment()->getTestDeviceList()) + { + const DeviceContext& deviceContext = testDevice->deviceContext(); + const DeviceStream& deviceStream = testDevice->deviceStream(); + setActiveDevice(testDevice->deviceInfo()); + + DeviceBuffer buffer; + int numValues = 123; + allocateDeviceBuffer(&buffer, numValues, deviceContext); + HostVector valuesIn(numValues, { pinningPolicy }); + HostVector valuesOut(numValues, { pinningPolicy }); + + std::iota(valuesIn.begin(), valuesIn.end(), c_initialValue); + + copyToDeviceBuffer(&buffer, valuesIn.data(), 0, numValues, deviceStream, transferKind, nullptr); + copyFromDeviceBuffer( + valuesOut.data(), &buffer, 0, numValues, deviceStream, transferKind, nullptr); + if (transferKind == GpuApiCallBehavior::Async) + { + deviceStream.synchronize(); + } + EXPECT_THAT(valuesOut, Pointwise(Eq(), valuesIn)) + << "Changed after H2D and D2H " << enumValueToString(transferKind) << " copy."; + freeDeviceBuffer(&buffer); + } } } TYPED_TEST(DeviceBufferTest, CanCopyToAndFromDeviceWithOffset) { - for (const auto& testDevice : getTestHardwareEnvironment()->getTestDeviceList()) + for (auto transferKind : { GpuApiCallBehavior::Sync, GpuApiCallBehavior::Async }) { - const DeviceContext& deviceContext = testDevice->deviceContext(); - const DeviceStream& deviceStream = testDevice->deviceStream(); - setActiveDevice(testDevice->deviceInfo()); - - DeviceBuffer buffer; - int numValues = 123; - allocateDeviceBuffer(&buffer, 2 * numValues, deviceContext); - std::vector valuesIn(numValues); - std::vector valuesOut(2 * numValues); - - std::iota(valuesIn.begin(), valuesIn.end(), c_initialValue); - - // Fill the buffer with two copies of valuesIn, one after the other. - copyToDeviceBuffer( - &buffer, valuesIn.data(), 0, numValues, deviceStream, GpuApiCallBehavior::Sync, nullptr); - copyToDeviceBuffer( - &buffer, valuesIn.data(), numValues, numValues, deviceStream, GpuApiCallBehavior::Sync, nullptr); - // Do the same copying on the CPU, so we can test it works - // correctly. - valuesIn.insert(valuesIn.end(), valuesIn.begin(), valuesIn.end()); - - copyFromDeviceBuffer( - valuesOut.data(), &buffer, 0, 2 * numValues, deviceStream, GpuApiCallBehavior::Sync, nullptr); - EXPECT_THAT(valuesOut, Pointwise(Eq(), valuesIn)) << "Changed after H2D and D2H copy."; - - SCOPED_TRACE("Checking the copy respects the output range"); - - // Remove the first element, and push another copy of the last - // element, so we can check that a copy of all of the data - // skipping the first element correctly over-writes exactly - // all but one of the old values. - valuesIn.erase(valuesIn.begin()); - valuesIn.push_back(valuesIn.back()); - copyFromDeviceBuffer( - valuesOut.data(), &buffer, 1, 2 * numValues - 1, deviceStream, GpuApiCallBehavior::Sync, nullptr); - EXPECT_THAT(valuesOut, Pointwise(Eq(), valuesIn)) << "Changed after H2D and D2H copy."; + PinningPolicy pinningPolicy = (transferKind == GpuApiCallBehavior::Async) + ? PinningPolicy::PinnedIfSupported + : PinningPolicy::CannotBePinned; + for (const auto& testDevice : getTestHardwareEnvironment()->getTestDeviceList()) + { + const DeviceContext& deviceContext = testDevice->deviceContext(); + const DeviceStream& deviceStream = testDevice->deviceStream(); + setActiveDevice(testDevice->deviceInfo()); + + DeviceBuffer buffer; + int numValues = 123; + allocateDeviceBuffer(&buffer, 2 * numValues, deviceContext); + HostVector valuesIn(numValues, { pinningPolicy }); + HostVector valuesOut(2 * numValues, { pinningPolicy }); + + std::iota(valuesIn.begin(), valuesIn.end(), c_initialValue); + + // Fill the buffer with two copies of valuesIn, one after the other. + copyToDeviceBuffer(&buffer, valuesIn.data(), 0, numValues, deviceStream, transferKind, nullptr); + copyToDeviceBuffer( + &buffer, valuesIn.data(), numValues, numValues, deviceStream, transferKind, nullptr); + // Wait until GPU is done andd o the same copying on the CPU, so we can test it works correctly. + if (transferKind == GpuApiCallBehavior::Async) + { + deviceStream.synchronize(); + } + valuesIn.insert(valuesIn.end(), valuesIn.begin(), valuesIn.end()); + + copyFromDeviceBuffer( + valuesOut.data(), &buffer, 0, 2 * numValues, deviceStream, transferKind, nullptr); + if (transferKind == GpuApiCallBehavior::Async) + { + deviceStream.synchronize(); + } + EXPECT_THAT(valuesOut, Pointwise(Eq(), valuesIn)) + << "Changed after H2D and D2H " << enumValueToString(transferKind) << " copy."; + + SCOPED_TRACE("Checking the copy respects the output range"); + + // Remove the first element, and push another copy of the last + // element, so we can check that a copy of all of the data + // skipping the first element correctly over-writes exactly + // all but one of the old values. + valuesIn.erase(valuesIn.begin()); + valuesIn.push_back(valuesIn.back()); + copyFromDeviceBuffer( + valuesOut.data(), &buffer, 1, 2 * numValues - 1, deviceStream, transferKind, nullptr); + if (transferKind == GpuApiCallBehavior::Async) + { + deviceStream.synchronize(); + } + EXPECT_THAT(valuesOut, Pointwise(Eq(), valuesIn)) + << "Changed after H2D and D2H " << enumValueToString(transferKind) << " copy."; + } } } - } // namespace } // namespace test } // namespace gmx -- 2.22.0