Support pinning in HostAllocator

[alexxy/gromacs.git] / src / gromacs / gpu_utils / tests / hostallocator.cpp
diff --git a/src/gromacs/gpu_utils/tests/hostallocator.cpp b/src/gromacs/gpu_utils/tests/hostallocator.cpp

index 689987bec79b65217b228af613bbfe8642628c1c..809cce2b5a593292c82126e9493dc83f9943de32 100644 (file)
--- a/src/gromacs/gpu_utils/tests/hostallocator.cpp
+++ b/src/gromacs/gpu_utils/tests/hostallocator.cpp
@@ -42,12 +42,16 @@
  
  #include "gromacs/gpu_utils/hostallocator.h"
  
+#include "config.h"
+
  #include <type_traits>
  #include <vector>
  
  #include <gtest/gtest.h>
  
+#include "gromacs/gpu_utils/gpu_utils.h"
  #include "gromacs/math/vectypes.h"
+#include "gromacs/utility/arrayref.h"
  #include "gromacs/utility/real.h"
  
  #include "devicetransfers.h"
@@ -59,25 +63,20 @@ namespace gmx
  namespace
  {
  
-//! The types used in testing.
-typedef ::testing::Types<int, real, RVec> TestTypes;
-
-//! Typed test fixture
+/*! \internal \brief Typed test fixture for infrastructure for
+ * host-side memory used for GPU transfers. */
  template <typename T>
-class HostAllocatorTest : public test::GpuTest
+class HostMemoryTest : public test::GpuTest
  {
      public:
          //! Convenience type
          using ValueType = T;
          //! Convenience type
-        using AllocatorType = HostAllocator<T>;
-        //! Convenience type
-        using VectorType = std::vector<ValueType, AllocatorType>;
-        //! Convenience type
          using ViewType = ArrayRef<ValueType>;
          //! Convenience type
          using ConstViewType = ArrayRef<const ValueType>;
          //! Prepare contents of a VectorType.
+        template <typename VectorType>
          void fillInput(VectorType *input) const;
          //! Compares input and output vectors.
          void compareVectors(ConstViewType input,
@@ -87,25 +86,30 @@ class HostAllocatorTest : public test::GpuTest
  };
  
  // Already documented
-template <typename T>
-void HostAllocatorTest<T>::fillInput(VectorType *input) const
+template <typename T> template <typename VectorType>
+void HostMemoryTest<T>::fillInput(VectorType *input) const
  {
-    input->push_back(1);
-    input->push_back(2);
-    input->push_back(3);
+    input->resize(3);
+    (*input)[0] = 1;
+    (*input)[1] = 2;
+    (*input)[2] = 3;
  }
  
  //! Initialization specialization for RVec
-template <>
-void HostAllocatorTest<RVec>::fillInput(VectorType *input) const
+template <> template <typename VectorType>
+void HostMemoryTest<RVec>::fillInput(VectorType *input) const
  {
-    input->push_back({1, 2, 3});
+    input->reserve(3);
+    input->resize(3);
+    (*input)[0] = {1, 2, 3};
+    (*input)[1] = {4, 5, 6};
+    (*input)[2] = {7, 8, 9};
  }
  
  // Already documented
  template <typename T>
-void HostAllocatorTest<T>::compareVectors(ConstViewType input,
-                                          ConstViewType output) const
+void HostMemoryTest<T>::compareVectors(ConstViewType input,
+                                       ConstViewType output) const
  {
      for (size_t i = 0; i != input.size(); ++i)
      {
@@ -115,8 +119,8 @@ void HostAllocatorTest<T>::compareVectors(ConstViewType input,
  
  //! Comparison specialization for RVec
  template <>
-void HostAllocatorTest<RVec>::compareVectors(ConstViewType input,
-                                             ConstViewType output) const
+void HostMemoryTest<RVec>::compareVectors(ConstViewType input,
+                                          ConstViewType output) const
  {
      for (size_t i = 0; i != input.size(); ++i)
      {
@@ -146,15 +150,8 @@ ArrayRef<char> charArrayRefFromArray(T *data, size_t size)
  }
  
  template <typename T>
-void HostAllocatorTest<T>::runTest(ConstViewType input, ViewType output) const
+void HostMemoryTest<T>::runTest(ConstViewType input, ViewType output) const
  {
-    // We can't do a test that does a transfer unless we have a
-    // compatible device.
-    if (!this->haveValidGpus())
-    {
-        return;
-    }
-
      // Convert the views of input and output to flat non-const chars,
      // so that there's no templating when we call doDeviceTransfers.
      auto inputRef  = charArrayRefFromArray(input.data(), input.size());
@@ -164,6 +161,22 @@ void HostAllocatorTest<T>::runTest(ConstViewType input, ViewType output) const
      this->compareVectors(input, output);
  }
  
+//! The types used in testing.
+typedef ::testing::Types<int, real, RVec> TestTypes;
+
+//! Typed test fixture
+template <typename T>
+class HostAllocatorTest : public HostMemoryTest<T>
+{
+    public:
+        //! Convenience type
+        using ValueType = T;
+        //! Convenience type
+        using AllocatorType = HostAllocator<T>;
+        //! Convenience type
+        using VectorType = std::vector<ValueType, AllocatorType>;
+};
+
  TYPED_TEST_CASE(HostAllocatorTest, TestTypes);
  
  // Note that in GoogleTest typed tests, the use of TestFixture:: and
@@ -178,48 +191,132 @@ TYPED_TEST(HostAllocatorTest, EmptyMemoryAlwaysWorks)
      typename TestFixture::VectorType v;
  }
  
-TYPED_TEST(HostAllocatorTest, TransfersUsingDefaultHostAllocatorWork)
+TYPED_TEST(HostAllocatorTest, VectorsWithDefaultHostAllocatorAlwaysWorks)
  {
      typename TestFixture::VectorType input = {{1, 2, 3}}, output;
      output.resize(input.size());
+}
+
+// Several tests actually do CUDA transfers. This is not necessary
+// because the state of page alignment or pinning is not currently
+// relevant to the success of a CUDA transfer. CUDA checks happen only
+// during cudaHostRegister and cudaHostUnregister. Such tests are of
+// value only when this behaviour changes, if ever.
+
+TYPED_TEST(HostAllocatorTest, TransfersWithoutPinningWork)
+{
+    typename TestFixture::VectorType input;
+    this->fillInput(&input);
+    typename TestFixture::VectorType output;
+    output.resize(input.size());
  
      this->runTest(input, output);
  }
  
-TYPED_TEST(HostAllocatorTest, TransfersUsingNormalCpuHostAllocatorWork)
+TYPED_TEST(HostAllocatorTest, FillInputAlsoWorksAfterCallingReserve)
  {
-    // Make an allocator with a 'normal CPU' allocation policy. This
-    // might be slower than another policy, but still works.
-    using AllocatorType       = typename TestFixture::AllocatorType;
-    using AllocatorPolicyType = typename AllocatorType::allocation_policy;
-    AllocatorPolicyType              policy(AllocatorPolicyType::Impl::AllocateAligned);
-    AllocatorType                    allocator(policy);
+    typename TestFixture::VectorType input;
+    input.reserve(3);
+    this->fillInput(&input);
+}
+
+#if GMX_GPU == GMX_GPU_CUDA
  
-    typename TestFixture::VectorType input(allocator);
+// Policy suitable for pinning is only supported for a CUDA build
+
+TYPED_TEST(HostAllocatorTest, TransfersWithPinningWorkWithCuda)
+{
+    typename TestFixture::VectorType input;
+    changePinningPolicy(&input, PinningPolicy::CanBePinned);
      this->fillInput(&input);
-    typename TestFixture::VectorType output(allocator);
+    typename TestFixture::VectorType output;
+    changePinningPolicy(&output, PinningPolicy::CanBePinned);
      output.resize(input.size());
  
      this->runTest(input, output);
  }
  
-TYPED_TEST(HostAllocatorTest, TransfersUsingGpuHostAllocatorWork)
+//! Helper function for wrapping a call to isHostMemoryPinned.
+template <typename VectorType>
+bool isPinned(const VectorType &v)
  {
-    // Make an allocator with a 'for GPU' allocation policy. This
-    // should be more efficient, but we can't test that.
-    using AllocatorType       = typename TestFixture::AllocatorType;
-    using AllocatorPolicyType = typename AllocatorType::allocation_policy;
-    AllocatorPolicyType              policy(AllocatorPolicyType::Impl::AllocateForGpu);
-    AllocatorType                    allocator(policy);
+    void *data = const_cast<void *>(static_cast<const void *>(v.data()));
+    return isHostMemoryPinned(data);
+}
+
+TYPED_TEST(HostAllocatorTest, ManualPinningOperationsWorkWithCuda)
+{
+    typename TestFixture::VectorType input;
+    changePinningPolicy(&input, PinningPolicy::CanBePinned);
+    EXPECT_FALSE(isPinned(input));
+
+    // Unpin before allocation is fine, but does nothing.
+    input.get_allocator().getPolicy().unpin();
+    EXPECT_FALSE(isPinned(input));
  
-    typename TestFixture::VectorType input(allocator);
+    // Pin with no contents is fine, but does nothing.
+    input.get_allocator().getPolicy().pin();
+    EXPECT_FALSE(isPinned(input));
+
+    // Fill some contents, which will be pinned because of the policy.
      this->fillInput(&input);
-    typename TestFixture::VectorType output(allocator);
-    output.resize(input.size());
+    EXPECT_TRUE(isPinned(input));
  
-    this->runTest(input, output);
+    // Unpin after pin is fine.
+    input.get_allocator().getPolicy().unpin();
+    EXPECT_FALSE(isPinned(input));
+
+    // Repeated unpin should be a no-op.
+    input.get_allocator().getPolicy().unpin();
+
+    // Pin after unpin is fine.
+    input.get_allocator().getPolicy().pin();
+    EXPECT_TRUE(isPinned(input));
+
+    // Repeated pin should be a no-op, and still pinned.
+    input.get_allocator().getPolicy().pin();
+    EXPECT_TRUE(isPinned(input));
+
+    // Switching policy to CannotBePinned must unpin the buffer (via
+    // realloc and copy).
+    auto oldInputData = input.data();
+    changePinningPolicy(&input, PinningPolicy::CannotBePinned);
+    EXPECT_FALSE(isPinned(input));
+    // These cannot be equal as both had to be allocated at the same
+    // time for the contents to be able to be copied.
+    EXPECT_NE(oldInputData, input.data());
+
+    // Switching policy to CanBePinned must pin the buffer (via
+    // realloc and copy).
+    oldInputData = input.data();
+    changePinningPolicy(&input, PinningPolicy::CanBePinned);
+    EXPECT_TRUE(isPinned(input));
+    // These cannot be equal as both had to be allocated at the same
+    // time for the contents to be able to be copied.
+    EXPECT_NE(oldInputData, input.data());
+}
+
+#else
+
+TYPED_TEST(HostAllocatorTest, ChangingPinningPolicyRequiresCuda)
+{
+    typename TestFixture::VectorType input;
+    EXPECT_DEATH(changePinningPolicy(&input, PinningPolicy::CanBePinned),
+                 ".*A suitable build of GROMACS.* is required.*");
  }
  
+TYPED_TEST(HostAllocatorTest, ManualPinningOperationsWorkEvenWithoutCuda)
+{
+    typename TestFixture::VectorType input;
+
+    // Since the buffer can't be pinned and isn't pinned, and the
+    // calling code can't be unhappy about this, these are OK.
+    input.get_allocator().getPolicy().pin();
+    input.get_allocator().getPolicy().unpin();
+}
+
+#endif
+
  TYPED_TEST(HostAllocatorTest, StatefulAllocatorUsesMemory)
  {
      // The HostAllocator has state, so a container using it will be
@@ -229,5 +326,16 @@ TYPED_TEST(HostAllocatorTest, StatefulAllocatorUsesMemory)
                sizeof(typename TestFixture::VectorType));
  }
  
+//! Declare allocator types to test.
+using AllocatorTypesToTest = ::testing::Types<HostAllocator<real>,
+                                              HostAllocator<int>,
+                                              HostAllocator<RVec>
+                                              >;
+
+TYPED_TEST_CASE(AllocatorTest, AllocatorTypesToTest);
+
  } // namespace
  } // namespace
+
+// Includes tests common to all allocation policies.
+#include "gromacs/utility/tests/alignedallocator-impl.h"