Add tests on asynchronous host-to-device copy and back
authorArtem Zhmurov <zhmurov@gmail.com>
Wed, 24 Feb 2021 19:24:01 +0000 (19:24 +0000)
committerAndrey Alekseenko <al42and@gmail.com>
Wed, 24 Feb 2021 19:24:01 +0000 (19:24 +0000)
The copy routines take different paths depending on transfer kind,
hence both sync and async copy should be tested. This adds tests
for async calls.

src/gromacs/gpu_utils/gpu_utils.cpp
src/gromacs/gpu_utils/gpu_utils.h
src/gromacs/gpu_utils/tests/device_buffer.cpp

index 663137084f0bfaaa85046cf39922f7b4dbf70176..5fab6b73795b5d496d6b35e163695673ea4960d6 100644 (file)
 #include "config.h"
 
 #include "gromacs/utility/arrayref.h"
+#include "gromacs/utility/enumerationhelpers.h"
 #include "gromacs/utility/stringutil.h"
 
 #ifdef _MSC_VER
 #    pragma warning(disable : 6237)
 #endif
 
+const char* enumValueToString(GpuApiCallBehavior enumValue)
+{
+    static constexpr gmx::EnumerationArray<GpuApiCallBehavior, const char*> s_gpuApiCallBehaviorNames = {
+        "Synchronous", "Asynchronous"
+    };
+    return s_gpuApiCallBehaviorNames[enumValue];
+}
+
 /*! \brief Help build a descriptive message in \c error if there are
  * \c errorReasons why nonbondeds on a GPU are not supported.
  *
index fce1e995802450f160f91f91f9fdf38cbb9fdd6d..378015fb309729f3c1ef5886de03cffdf1b358f5 100644 (file)
@@ -4,7 +4,7 @@
  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  * Copyright (c) 2001-2010, The GROMACS development team.
  * Copyright (c) 2012,2013,2014,2015,2016 by the GROMACS development team.
- * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2017,2018,2019,2020,2021, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -60,12 +60,19 @@ class MDLogger;
 }
 
 //! Enum which is only used to describe transfer calls at the moment
-enum class GpuApiCallBehavior
+enum class GpuApiCallBehavior : int
 {
+    //! Synchronous
     Sync,
-    Async
+    //! Asynchronous
+    Async,
+    //! Size of the enumeration
+    Count
 };
 
+//! String corresponding to GPU API call behavior
+const char* enumValueToString(GpuApiCallBehavior enumValue);
+
 //! Types of actions associated to waiting or checking the completion of GPU tasks
 enum class GpuTaskCompletion
 {
index 86cfa6bdd52244089f015f0249712071d55d3801..56bbd91afc223c68c4659d69e7d49befa2a6c04e 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2020, by the GROMACS development team, led by
+ * Copyright (c) 2020,2021, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -52,6 +52,7 @@
 #    include "gromacs/gpu_utils/device_context.h"
 #    include "gromacs/gpu_utils/device_stream.h"
 #    include "gromacs/gpu_utils/devicebuffer.h"
+#    include "gromacs/gpu_utils/hostallocator.h"
 
 #    include "testutils/test_hardware_environment.h"
 #    include "testutils/testasserts.h"
@@ -142,73 +143,100 @@ const gmx::RVec c_initialValue<gmx::RVec> = { 1, -2, 3 };
 
 TYPED_TEST(DeviceBufferTest, CanCopyToAndFromDevice)
 {
-    for (const auto& testDevice : getTestHardwareEnvironment()->getTestDeviceList())
+    for (auto transferKind : { GpuApiCallBehavior::Sync, GpuApiCallBehavior::Async })
     {
-        const DeviceContext& deviceContext = testDevice->deviceContext();
-        const DeviceStream&  deviceStream  = testDevice->deviceStream();
-        setActiveDevice(testDevice->deviceInfo());
-
-        DeviceBuffer<TypeParam> buffer;
-        int                     numValues = 123;
-        allocateDeviceBuffer(&buffer, numValues, deviceContext);
-        std::vector<TypeParam> valuesIn(numValues);
-        std::vector<TypeParam> valuesOut(numValues);
-
-        std::iota(valuesIn.begin(), valuesIn.end(), c_initialValue<TypeParam>);
-
-        copyToDeviceBuffer(
-                &buffer, valuesIn.data(), 0, numValues, deviceStream, GpuApiCallBehavior::Sync, nullptr);
-        copyFromDeviceBuffer(
-                valuesOut.data(), &buffer, 0, numValues, deviceStream, GpuApiCallBehavior::Sync, nullptr);
-        EXPECT_THAT(valuesOut, Pointwise(Eq(), valuesIn)) << "Changed after H2D and D2H copy.";
-        freeDeviceBuffer(&buffer);
+        PinningPolicy pinningPolicy = (transferKind == GpuApiCallBehavior::Async)
+                                              ? PinningPolicy::PinnedIfSupported
+                                              : PinningPolicy::CannotBePinned;
+        for (const auto& testDevice : getTestHardwareEnvironment()->getTestDeviceList())
+        {
+            const DeviceContext& deviceContext = testDevice->deviceContext();
+            const DeviceStream&  deviceStream  = testDevice->deviceStream();
+            setActiveDevice(testDevice->deviceInfo());
+
+            DeviceBuffer<TypeParam> buffer;
+            int                     numValues = 123;
+            allocateDeviceBuffer(&buffer, numValues, deviceContext);
+            HostVector<TypeParam> valuesIn(numValues, { pinningPolicy });
+            HostVector<TypeParam> valuesOut(numValues, { pinningPolicy });
+
+            std::iota(valuesIn.begin(), valuesIn.end(), c_initialValue<TypeParam>);
+
+            copyToDeviceBuffer(&buffer, valuesIn.data(), 0, numValues, deviceStream, transferKind, nullptr);
+            copyFromDeviceBuffer(
+                    valuesOut.data(), &buffer, 0, numValues, deviceStream, transferKind, nullptr);
+            if (transferKind == GpuApiCallBehavior::Async)
+            {
+                deviceStream.synchronize();
+            }
+            EXPECT_THAT(valuesOut, Pointwise(Eq(), valuesIn))
+                    << "Changed after H2D and D2H " << enumValueToString(transferKind) << " copy.";
+            freeDeviceBuffer(&buffer);
+        }
     }
 }
 
 TYPED_TEST(DeviceBufferTest, CanCopyToAndFromDeviceWithOffset)
 {
-    for (const auto& testDevice : getTestHardwareEnvironment()->getTestDeviceList())
+    for (auto transferKind : { GpuApiCallBehavior::Sync, GpuApiCallBehavior::Async })
     {
-        const DeviceContext& deviceContext = testDevice->deviceContext();
-        const DeviceStream&  deviceStream  = testDevice->deviceStream();
-        setActiveDevice(testDevice->deviceInfo());
-
-        DeviceBuffer<TypeParam> buffer;
-        int                     numValues = 123;
-        allocateDeviceBuffer(&buffer, 2 * numValues, deviceContext);
-        std::vector<TypeParam> valuesIn(numValues);
-        std::vector<TypeParam> valuesOut(2 * numValues);
-
-        std::iota(valuesIn.begin(), valuesIn.end(), c_initialValue<TypeParam>);
-
-        // Fill the buffer with two copies of valuesIn, one after the other.
-        copyToDeviceBuffer(
-                &buffer, valuesIn.data(), 0, numValues, deviceStream, GpuApiCallBehavior::Sync, nullptr);
-        copyToDeviceBuffer(
-                &buffer, valuesIn.data(), numValues, numValues, deviceStream, GpuApiCallBehavior::Sync, nullptr);
-        // Do the same copying on the CPU, so we can test it works
-        // correctly.
-        valuesIn.insert(valuesIn.end(), valuesIn.begin(), valuesIn.end());
-
-        copyFromDeviceBuffer(
-                valuesOut.data(), &buffer, 0, 2 * numValues, deviceStream, GpuApiCallBehavior::Sync, nullptr);
-        EXPECT_THAT(valuesOut, Pointwise(Eq(), valuesIn)) << "Changed after H2D and D2H copy.";
-
-        SCOPED_TRACE("Checking the copy respects the output range");
-
-        // Remove the first element, and push another copy of the last
-        // element, so we can check that a copy of all of the data
-        // skipping the first element correctly over-writes exactly
-        // all but one of the old values.
-        valuesIn.erase(valuesIn.begin());
-        valuesIn.push_back(valuesIn.back());
-        copyFromDeviceBuffer(
-                valuesOut.data(), &buffer, 1, 2 * numValues - 1, deviceStream, GpuApiCallBehavior::Sync, nullptr);
-        EXPECT_THAT(valuesOut, Pointwise(Eq(), valuesIn)) << "Changed after H2D and D2H copy.";
+        PinningPolicy pinningPolicy = (transferKind == GpuApiCallBehavior::Async)
+                                              ? PinningPolicy::PinnedIfSupported
+                                              : PinningPolicy::CannotBePinned;
+        for (const auto& testDevice : getTestHardwareEnvironment()->getTestDeviceList())
+        {
+            const DeviceContext& deviceContext = testDevice->deviceContext();
+            const DeviceStream&  deviceStream  = testDevice->deviceStream();
+            setActiveDevice(testDevice->deviceInfo());
+
+            DeviceBuffer<TypeParam> buffer;
+            int                     numValues = 123;
+            allocateDeviceBuffer(&buffer, 2 * numValues, deviceContext);
+            HostVector<TypeParam> valuesIn(numValues, { pinningPolicy });
+            HostVector<TypeParam> valuesOut(2 * numValues, { pinningPolicy });
+
+            std::iota(valuesIn.begin(), valuesIn.end(), c_initialValue<TypeParam>);
+
+            // Fill the buffer with two copies of valuesIn, one after the other.
+            copyToDeviceBuffer(&buffer, valuesIn.data(), 0, numValues, deviceStream, transferKind, nullptr);
+            copyToDeviceBuffer(
+                    &buffer, valuesIn.data(), numValues, numValues, deviceStream, transferKind, nullptr);
+            // Wait until GPU is done andd o the same copying on the CPU, so we can test it works correctly.
+            if (transferKind == GpuApiCallBehavior::Async)
+            {
+                deviceStream.synchronize();
+            }
+            valuesIn.insert(valuesIn.end(), valuesIn.begin(), valuesIn.end());
+
+            copyFromDeviceBuffer(
+                    valuesOut.data(), &buffer, 0, 2 * numValues, deviceStream, transferKind, nullptr);
+            if (transferKind == GpuApiCallBehavior::Async)
+            {
+                deviceStream.synchronize();
+            }
+            EXPECT_THAT(valuesOut, Pointwise(Eq(), valuesIn))
+                    << "Changed after H2D and D2H " << enumValueToString(transferKind) << " copy.";
+
+            SCOPED_TRACE("Checking the copy respects the output range");
+
+            // Remove the first element, and push another copy of the last
+            // element, so we can check that a copy of all of the data
+            // skipping the first element correctly over-writes exactly
+            // all but one of the old values.
+            valuesIn.erase(valuesIn.begin());
+            valuesIn.push_back(valuesIn.back());
+            copyFromDeviceBuffer(
+                    valuesOut.data(), &buffer, 1, 2 * numValues - 1, deviceStream, transferKind, nullptr);
+            if (transferKind == GpuApiCallBehavior::Async)
+            {
+                deviceStream.synchronize();
+            }
+            EXPECT_THAT(valuesOut, Pointwise(Eq(), valuesIn))
+                    << "Changed after H2D and D2H " << enumValueToString(transferKind) << " copy.";
+        }
     }
 }
 
-
 } // namespace
 } // namespace test
 } // namespace gmx