Support pinning in HostAllocator

author Mark Abraham <mark.j.abraham@gmail.com>

Sat, 11 Nov 2017 19:50:20 +0000 (12:50 -0700)

committer Mark Abraham <mark.j.abraham@gmail.com>

Tue, 21 Nov 2017 06:34:56 +0000 (07:34 +0100)
author Mark Abraham <mark.j.abraham@gmail.com>
Sat, 11 Nov 2017 19:50:20 +0000 (12:50 -0700)
committer Mark Abraham <mark.j.abraham@gmail.com>
Tue, 21 Nov 2017 06:34:56 +0000 (07:34 +0100)
diff --git a/src/gromacs/gpu_utils/CMakeLists.txt b/src/gromacs/gpu_utils/CMakeLists.txt

index 27a190cf7a9e9b52bd70c6c553cfd876b7f67eb9..04e9b752a1c78496aebc9eab034d55de3ac6c3f6 100644 (file)
--- a/src/gromacs/gpu_utils/CMakeLists.txt
+++ b/src/gromacs/gpu_utils/CMakeLists.txt
@@ -32,10 +32,12 @@
  # To help us fund GROMACS development, we humbly ask that you cite
  # the research papers on the package. Check out http://www.gromacs.org.
  
  # To help us fund GROMACS development, we humbly ask that you cite
  # the research papers on the package. Check out http://www.gromacs.org.
  
+gmx_add_libgromacs_sources(
+        hostallocator.cpp
+        )
  if(GMX_USE_OPENCL)
      gmx_add_libgromacs_sources(
          gpu_utils_ocl.cpp
  if(GMX_USE_OPENCL)
      gmx_add_libgromacs_sources(
          gpu_utils_ocl.cpp
-        hostallocator.cpp
          ocl_compiler.cpp
          ocl_caching.cpp
          oclutils.cpp
          ocl_compiler.cpp
          ocl_caching.cpp
          oclutils.cpp
@@ -43,14 +45,13 @@ if(GMX_USE_OPENCL)
  elseif(GMX_USE_CUDA)
      gmx_add_libgromacs_sources(
          cudautils.cu
  elseif(GMX_USE_CUDA)
      gmx_add_libgromacs_sources(
          cudautils.cu
-        hostallocator.cu
          gpu_utils.cu
          gpu_utils.cu
+        pinning.cu
          pmalloc_cuda.cu
          )
  else()
      gmx_add_libgromacs_sources(
          gpu_utils.cpp
          pmalloc_cuda.cu
          )
  else()
      gmx_add_libgromacs_sources(
          gpu_utils.cpp
-        hostallocator.cpp
          )
  endif()
  
          )
  endif()
  
diff --git a/src/gromacs/gpu_utils/cudautils.cuh b/src/gromacs/gpu_utils/cudautils.cuh

index f8d73c96979688d7b56bde47871635e21b1c27f3..dae3548fdd950a8d5bb1dee5bf8392b61ce75df8 100644 (file)
--- a/src/gromacs/gpu_utils/cudautils.cuh
+++ b/src/gromacs/gpu_utils/cudautils.cuh
@@ -42,9 +42,51 @@
  #include <nvml.h>
  #endif /* HAVE_NVML */
  
  #include <nvml.h>
  #endif /* HAVE_NVML */
  
+#include <string>
+
  #include "gromacs/math/vec.h"
  #include "gromacs/math/vectypes.h"
  #include "gromacs/utility/fatalerror.h"
  #include "gromacs/math/vec.h"
  #include "gromacs/math/vectypes.h"
  #include "gromacs/utility/fatalerror.h"
+#include "gromacs/utility/gmxassert.h"
+#include "gromacs/utility/stringutil.h"
+
+namespace gmx
+{
+namespace
+{
+
+/*! \brief Helper function to ensure no pending error silently
+ * disrupts error handling.
+ *
+ * Asserts in a debug build if an unhandled error is present. Issues a
+ * warning at run time otherwise.
+ *
+ * \todo This is similar to CU_CHECK_PREV_ERR, which should be
+ * consolidated.
+ */
+static inline void ensureNoPendingCudaError(const char *errorMessage)
+{
+    // Ensure there is no pending error that would otherwise affect
+    // the behaviour of future error handling.
+    cudaError_t stat = cudaGetLastError();
+    if (stat == cudaSuccess)
+    {
+        return;
+    }
+
+    // If we would find an error in a release build, we do not know
+    // what is appropriate to do about it, so assert only for debug
+    // builds.
+    auto fullMessage = formatString("%s An unhandled error from a previous CUDA operation was detected. %s: %s",
+                                    errorMessage, cudaGetErrorName(stat), cudaGetErrorString(stat));
+    GMX_ASSERT(stat == cudaSuccess, fullMessage.c_str());
+    // TODO When we evolve a better logging framework, use that
+    // for release-build error reporting.
+    gmx_warning(fullMessage.c_str());
+}
+
+}   // namespace
+}   // namespace
  
  enum class GpuApiCallBehavior;
  
  
  enum class GpuApiCallBehavior;
  
diff --git a/src/gromacs/gpu_utils/hostallocator.cpp b/src/gromacs/gpu_utils/hostallocator.cpp

index 3ceaefeb7bbbe1b47f5444b53fb32753610fc598..a23c0cb12c83321e75844f5af85512ebab50267b 100644 (file)
--- a/src/gromacs/gpu_utils/hostallocator.cpp
+++ b/src/gromacs/gpu_utils/hostallocator.cpp
@@ -34,12 +34,7 @@
   */
  /*! \internal \file
   * \brief Implements gmx::HostAllocationPolicy for allocating memory
   */
  /*! \internal \file
   * \brief Implements gmx::HostAllocationPolicy for allocating memory
- * suitable for GPU transfers on OpenCL, and when no GPU
- * implementation is used.
- *
- * \todo The same implementation can be used because we do not
- * currently attempt to optimize the allocation of host-side buffers
- * for OpenCL transfers, but this might be good to do.
+ * suitable for e.g. GPU transfers on CUDA.
   *
   * \author Mark Abraham <mark.j.abraham@gmail.com>
   */
   *
   * \author Mark Abraham <mark.j.abraham@gmail.com>
   */
@@ -47,39 +42,149 @@
  
  #include "hostallocator.h"
  
  
  #include "hostallocator.h"
  
-#include <cstdlib>
+#include "config.h"
+
+#include <cstddef>
+
+#include <memory>
  
  #include "gromacs/utility/alignedallocator.h"
  
  #include "gromacs/utility/alignedallocator.h"
+#include "gromacs/utility/gmxassert.h"
+#include "gromacs/utility/stringutil.h"
+
+#include "pinning.h"
  
  namespace gmx
  {
  
  
  namespace gmx
  {
  
-HostAllocationPolicy::HostAllocationPolicy(Impl s) : allocateForGpu_(s) {}
+//! Private implementation class.
+class HostAllocationPolicy::Impl
+{
+    public:
+        /*! \brief Pointer to the last unfreed allocation, or nullptr
+         * if no allocation exists.
+         *
+         * Note that during e.g. std::vector.resize() a call to its
+         * allocator's allocate() function precedes the call to its
+         * allocator's deallocate() function for freeing the old
+         * buffer after the data has been copied from it. So in
+         * general, pointer_ will not match the argument received by
+         * free(). */
+        void         *pointer_ = nullptr;
+        //! Number of bytes in the last unfreed allocation.
+        std::size_t   numBytes_ = 0;
+        //! The pointer to any storage that has been pinned, or nullptr if none has been pinned.
+        void         *pinnedPointer_ = nullptr;
+        //! Whether this object is in mode where new allocations will be pinned by default.
+        PinningPolicy pinningPolicy_ = PinningPolicy::CannotBePinned;
+};
  
  
-void *
-HostAllocationPolicy::malloc(std::size_t bytes) const
+HostAllocationPolicy::HostAllocationPolicy() : impl_(std::make_shared<Impl>())
  {
  {
-    GMX_UNUSED_VALUE(allocateForGpu_);
-    // TODO if/when this is properly supported for OpenCL, we
-    // should explore whether it is needed, and if so what
-    // page size is desirable for alignment.
-    return AlignedAllocationPolicy::malloc(bytes);
  }
  
  }
  
-void
-HostAllocationPolicy::free(void *buffer) const
+std::size_t HostAllocationPolicy::alignment()
  {
  {
+    return (impl_->pinningPolicy_ == PinningPolicy::CanBePinned ?
+            PageAlignedAllocationPolicy::alignment() :
+            AlignedAllocationPolicy::alignment());
+}
+void *HostAllocationPolicy::malloc(std::size_t bytes) const noexcept
+{
+    // A container could have a pinned allocation that is being
+    // extended, in which case we must un-pin while we still know the
+    // old pinned vector, and which also ensures we don't pin two
+    // buffers at the same time. If there's no allocation, or it isn't
+    // pinned, then attempting to unpin it is OK, too.
+    unpin();
+    impl_->pointer_ = (impl_->pinningPolicy_ == PinningPolicy::CanBePinned ?
+                       PageAlignedAllocationPolicy::malloc(bytes) :
+                       AlignedAllocationPolicy::malloc(bytes));
+
+    if (impl_->pointer_ != nullptr)
+    {
+        impl_->numBytes_ = bytes;
+    }
+    pin();
+    return impl_->pointer_;
+}
+
+void HostAllocationPolicy::free(void *buffer) const noexcept
+{
+    unpin();
      if (buffer == nullptr)
      {
      if (buffer == nullptr)
      {
+        // Nothing to do
          return;
      }
          return;
      }
-    GMX_UNUSED_VALUE(allocateForGpu_);
-    AlignedAllocationPolicy::free(buffer);
+    if (impl_->pinningPolicy_ == PinningPolicy::CanBePinned)
+    {
+        PageAlignedAllocationPolicy::free(buffer);
+    }
+    else
+    {
+        AlignedAllocationPolicy::free(buffer);
+    }
+    impl_->pointer_  = nullptr;
+    impl_->numBytes_ = 0;
  }
  
  }
  
-HostAllocationPolicy makeHostAllocationPolicyForGpu()
+PinningPolicy HostAllocationPolicy::pinningPolicy() const
  {
  {
-    return HostAllocationPolicy(HostAllocationPolicy::Impl::AllocateForGpu);
+    return impl_->pinningPolicy_;
+}
+
+void HostAllocationPolicy::setPinningPolicy(PinningPolicy pinningPolicy)
+{
+    if (GMX_GPU != GMX_GPU_CUDA)
+    {
+        GMX_RELEASE_ASSERT(pinningPolicy == PinningPolicy::CannotBePinned,
+                           "A suitable build of GROMACS (e.g. with CUDA) is required for a "
+                           "HostAllocationPolicy to be set to a mode that produces pinning.");
+    }
+    impl_->pinningPolicy_ = pinningPolicy;
+}
+
+void HostAllocationPolicy::pin() const noexcept
+{
+    if (impl_->pinningPolicy_ == PinningPolicy::CannotBePinned ||
+        impl_->pointer_ == nullptr ||
+        impl_->pinnedPointer_ != nullptr)
+    {
+        // Do nothing if we're not in pinning mode, or the allocation
+        // is empty, or it is already pinned.
+        return;
+    }
+#if GMX_GPU == GMX_GPU_CUDA
+    pinBuffer(impl_->pointer_, impl_->numBytes_);
+    impl_->pinnedPointer_ = impl_->pointer_;
+#else
+    const char *errorMessage = "Could not register the host memory for pinning.";
+
+    GMX_RELEASE_ASSERT(impl_->pinningPolicy_ == PinningPolicy::CannotBePinned,
+                       formatString("%s This build configuration must only have pinning policy "
+                                    "that leads to no pinning.", errorMessage).c_str());
+#endif
+}
+
+void HostAllocationPolicy::unpin() const noexcept
+{
+    if (impl_->pinnedPointer_ == nullptr)
+    {
+        return;
+    }
+
+#if GMX_GPU == GMX_GPU_CUDA
+    // Note that if the caller deactivated pinning mode, we still want
+    // to be able to unpin if the allocation is still pinned.
+
+    unpinBuffer(impl_->pointer_);
+    impl_->pinnedPointer_ = nullptr;
+#else
+    GMX_RELEASE_ASSERT(impl_->pinnedPointer_ == nullptr,
+                       "Since the build configuration does not support pinning, then "
+                       "the pinned pointer must be nullptr.");
+#endif
  }
  
  } // namespace gmx
  }
  
  } // namespace gmx
diff --git a/src/gromacs/gpu_utils/hostallocator.h b/src/gromacs/gpu_utils/hostallocator.h

index dae54f2144be7f2adf9867a1bcafcc722661acf9..a34c26ed2b712ce9364418069f713bd3e2e64236 100644 (file)
--- a/src/gromacs/gpu_utils/hostallocator.h
+++ b/src/gromacs/gpu_utils/hostallocator.h
@@ -33,9 +33,11 @@
   * the research papers on the package. Check out http://www.gromacs.org.
   */
  /*! \libinternal \file
   * the research papers on the package. Check out http://www.gromacs.org.
   */
  /*! \libinternal \file
- * \brief Declares gmx::HostAllocationPolicy and gmx::HostAllocator,
- * which are used to make standard library containers that can
- * allocate memory suitable for GPU transfers.
+ * \brief Declares gmx::HostAllocationPolicy, gmx::HostAllocator, and
+ * gmx::HostVector, which are used to make/be standard library
+ * containers that can allocate memory suitable for transfers.
+ * Currently the only supported transfers using pinned memory are
+ * to CUDA GPUs, but other possibilities exist in future.
   *
   * \author Mark Abraham <mark.j.abraham@gmail.com>
   * \inlibraryapi
   *
   * \author Mark Abraham <mark.j.abraham@gmail.com>
   * \inlibraryapi
@@ -45,57 +47,115 @@
  
  #include <cstddef>
  
  
  #include <cstddef>
  
-#include "gromacs/utility/allocator.h"
+#include <memory>
+#include <vector>
+
+#include "gromacs/utility/alignedallocator.h"
+#include "gromacs/utility/exceptions.h"
  
  namespace gmx
  {
  
  
  namespace gmx
  {
  
+/*! \brief Helper enum for pinning policy of the allocation of
+ * HostAllocationPolicy.
+ *
+ * For an efficient non-blocking transfer (e.g. to a GPU), the memory
+ * pages for a buffer need to be pinned to a physical page. Aligning
+ * such buffers to a physical page should miminize the number of pages
+ * that need to be pinned. However, some buffers that may be used for
+ * such transfers may also be used in either GROMACS builds or run
+ * paths that cannot use such a device, so the policy can be
+ * configured so that the resource consumption is no higher than
+ * required for correct, efficient operation in all cases. */
+enum class PinningPolicy : int
+{
+    CannotBePinned,     // Memory is not known to be suitable for pinning.
+    CanBePinned,        // Memory is suitable for efficient pinning, e.g. because it is
+                        // allocated to be page aligned, and will be pinned when non-empty.
+};
+
+//! Forward declaration of host allocation policy class.
+class HostAllocationPolicy;
+
+/*! \brief Memory allocator that uses HostAllocationPolicy.
+ *
+ *  \tparam T          Type of objects to allocate
+ *
+ * This convenience partial specialization can be used for the
+ * optional allocator template parameter in standard library
+ * containers whose memory may be used for e.g. GPU transfers. The
+ * memory will always be allocated according to the behavior of
+ * HostAllocationPolicy.
+ */
+template <class T>
+using HostAllocator = Allocator<T, HostAllocationPolicy>;
+
+//! Convenience alias for std::vector that uses HostAllocator.
+template <class T>
+using HostVector = std::vector<T, HostAllocator<T> >;
+
  /*! \libinternal
   * \brief Policy class for configuring gmx::Allocator, to manage
  /*! \libinternal
   * \brief Policy class for configuring gmx::Allocator, to manage
- * allocations of memory that is suitable for GPU transfers.
+ * allocations of memory that may be needed for e.g. GPU transfers.
   *
   * This allocator has state, so is most useful in cases where it is
   * not known at compile time whether the allocated memory will be
   *
   * This allocator has state, so is most useful in cases where it is
   * not known at compile time whether the allocated memory will be
- * transferred to a GPU. It will increase the size of containers that
- * use it. Memory allocated will always be aligned by the GPU
- * framework, or by AlignedAllocationPolicy.
+ * transferred to some device. It will increase the size of containers
+ * that use it. If the GROMACS build is configured with CUDA support,
+ * then memory will be allocated with PageAlignedAllocator, and that
+ * page pinned to physical memory if the pinning mode has been
+ * activated. If pinning mode is deactivated, or the GROMACS build
+ * does not support CUDA, then the memory will be allocated with
+ * AlignedAllocator. The pin() and unpin() methods work with the CUDA
+ * build, and silently do nothing otherwise. In future, we may modify
+ * or generalize this to work differently in other cases.
   *
   *
- * \todo Consider also having a stateless version of this policy,
- * which might be slightly faster or more convenient to use in the
- * cases where it is known at compile time that the allocation will be
- * used to transfer to a GPU.
+ * The intended use is to configure gmx::Allocator with this class as
+ * its policy class, and then to use e.g.
+ * std::vector::get_allocator().getPolicy() to control whether the
+ * allocation policy should activate its pinning mode. The policy
+ * object can also be used to explicitly pin() and unpin() the buffer
+ * when it is using PinningPolicy::CanBePinned. The policy object is
+ * returned by value (as required by the C++ standard for
+ * get_allocator(), which copies a std::shared_ptr, so the policy
+ * object should be retrieved sparingly, e.g. only upon resize of the
+ * allocation. (Normal operation of the vector, e.g. during resize,
+ * incurs only the cost of the pointer indirection needed to consult
+ * the current state of the allocation policy.)
+ *
+ * \todo As a minor optimization, consider also having a stateless
+ * version of this policy, which might be slightly faster or more
+ * convenient to use in the cases where it is known at compile time
+ * that the allocation will be used to transfer to a GPU.
   */
  class HostAllocationPolicy
  {
      public:
   */
  class HostAllocationPolicy
  {
      public:
-        //! Helper construction enum
-        enum class Impl : int
-        {
-            AllocateAligned  = 0,
-            AllocateForGpu   = 1
-        };
-        //! Constructor.
-        explicit HostAllocationPolicy(Impl s = Impl::AllocateAligned);
-        /*! \brief Allocate GPU memory
-         *
-         *  \param bytes Amount of memory (bytes) to allocate. It is
-         *               valid to ask for 0 bytes, which will return a
-         *               non-null pointer that is properly aligned in
-         *               page-locked memory (but that you should not
-         *               use). TODO check this.
-         *
-         * \return Valid pointer if the allocation worked, otherwise nullptr.
-         *
-         * The memory will always be allocated according to the requirements
-         * of the acceleration platform in use (e.g. CUDA).
+        //! Default constructor.
+        HostAllocationPolicy();
+        /*! \brief Return the alignment size currently used by the active pinning policy. */
+        std::size_t alignment();
+        /*! \brief Allocate and perhaps pin page-aligned memory suitable for
+         * e.g. GPU transfers.
+         *
+         * Before attempting to allocate, unpin() is called. After a
+         * successful allocation, pin() is called. (Whether these do
+         * things depends on the PinningPolicy that is in effect.)
+         *
+         *  \param bytes Amount of memory (bytes) to allocate. It is valid to ask for
+         *               0 bytes, which will return a non-null pointer that is properly
+         *               aligned and padded (but that you should not use).
+         *
+         *  \return Valid pointer if the allocation+optional pinning worked, otherwise nullptr.
           *
           *  \note Memory allocated with this routine must be released
           *        with gmx::HostAllocationPolicy::free(), and
           *        absolutely not the system free().
           *
           *  \note Memory allocated with this routine must be released
           *        with gmx::HostAllocationPolicy::free(), and
           *        absolutely not the system free().
+         *
+         * Does not throw.
           */
           */
-        void *
-        malloc(std::size_t bytes) const;
-        /*! \brief Free GPU memory
+        void *malloc(std::size_t bytes) const noexcept;
+        /*! \brief Free the memory, after unpinning (if appropriate).
           *
           *  \param buffer  Memory pointer previously returned from gmx::HostAllocationPolicy::malloc()
           *
           *
           *  \param buffer  Memory pointer previously returned from gmx::HostAllocationPolicy::malloc()
           *
@@ -103,36 +163,102 @@ class HostAllocationPolicy
           *        obtained from gmx:HostAllocationPolicy::malloc(),
           *        and absolutely not any pointers obtained the system
           *        malloc().
           *        obtained from gmx:HostAllocationPolicy::malloc(),
           *        and absolutely not any pointers obtained the system
           *        malloc().
+         *
+         * Does not throw.
           */
           */
-        void
-        free(void *buffer) const;
+        void free(void *buffer) const noexcept;
+        /*! \brief Pin the allocation to physical memory, if appropriate.
+         *
+         * If the allocation policy is not in pinning mode, or the
+         * allocation is empty, ot the allocation is already pinned,
+         * then do nothing.
+         *
+         * Does not throw.
+         */
+        void pin() const noexcept;
+        /*! \brief Unpin the allocation, if appropriate.
+         *
+         * Regardless of the allocation policy, unpin the memory if
+         * previously pinned, otherwise do nothing.
+         *
+         * Does not throw.
+         */
+        void unpin() const noexcept;
+        /*! \brief Return the current pinning policy (which is semi-independent
+         * of whether the buffer is actually pinned).
+         *
+         * Does not throw.
+         */
+        PinningPolicy pinningPolicy() const;
+        //! Specify an allocator trait so that the stateful allocator should propagate.
+        using propagate_on_container_copy_assignment = std::true_type;
+        //! Specify an allocator trait so that the stateful allocator should propagate.
+        using propagate_on_container_move_assignment = std::true_type;
+        //! Specify an allocator trait so that the stateful allocator should propagate.
+        using propagate_on_container_swap = std::true_type;
      private:
      private:
+        /*! \brief Set the current pinning policy.
+         *
+         * Does not pin any current buffer. Use changePinningPolicy to
+         * orchestrate the necessary unpin, allocate, copy, pin for
+         * effectively changing the pinning policy of a HostVector.
+         *
+         * Does not throw.
+         */
+        // cppcheck-suppress unusedPrivateFunction
+        void setPinningPolicy(PinningPolicy pinningPolicy);
+        /*! \brief Declare as a friend function the only supported way
+         * to change the pinning policy.
+         *
+         * When the pinning policy changes, we want the state of the
+         * allocation to match the new policy. However, that requires
+         * a copy and swap of the buffers, which can only take place
+         * at the level of the container. So we wrap the required
+         * operations in a helper friend function.
+         *
+         * Of course, if there is no allocation because the vector is
+         * empty, then nothing will change. */
+        template <class T> friend
+        void changePinningPolicy(HostVector<T> *v, PinningPolicy pinningPolicy);
+        //! Private implementation class.
+        class Impl;
          /*! \brief State of the allocator.
           *
          /*! \brief State of the allocator.
           *
-         * This could change through assignment of one policy to
-         * another, so isn't const. */
-        Impl allocateForGpu_;
+         * This could change through move- or copy-assignment of one
+         * policy to another, so isn't const. */
+        std::shared_ptr<Impl> impl_;
  };
  
  };
  
-/*! \brief Convenience function
- *
- * The default construction is for non-GPU allocation, and this
- * function makes it less verbose to get allocation intended for use
- * with a GPU. */
-HostAllocationPolicy makeHostAllocationPolicyForGpu();
-
-/*! \brief Memory allocator for host-side memory for GPU transfers.
+/*! \brief Helper function for changing the pinning policy of a HostVector.
   *
   *
- *  \tparam T          Type of objects to allocate
- *
- * This convenience partial specialization can be used for the
- * optional allocator template parameter in standard library
- * containers whose memory will be used for GPU transfers. The memory
- * will always be allocated according to the behavior of
- * HostAllocationPolicy.
- */
+ * If the vector has contents, then a full reallocation and buffer
+ * copy are needed if the policy change requires tighter restrictions,
+ * and desirable even if the policy change requires looser
+ * restrictions. That cost is OK, because GROMACS will do this
+ * operation very rarely (e.g. when auto-tuning and deciding to switch
+ * whether a task will run on a GPU, or not). */
  template <class T>
  template <class T>
-using HostAllocator = Allocator<T, HostAllocationPolicy>;
+void changePinningPolicy(HostVector<T> *v, PinningPolicy pinningPolicy)
+{
+    // Do we have anything to do?
+    HostAllocationPolicy vAllocationPolicy = v->get_allocator().getPolicy();
+    if (pinningPolicy == vAllocationPolicy.pinningPolicy())
+    {
+        return;
+    }
+    // Make sure we never have two allocated buffers that are both pinned.
+    vAllocationPolicy.unpin();
+
+    // Construct a new vector that has the requested
+    // allocation+pinning policy, to swap into *v. If *v is empty,
+    // then no real work is done.
+    HostAllocator<T> newAllocator;
+    newAllocator.getPolicy().setPinningPolicy(pinningPolicy);
+    HostVector<T>    newV(v->begin(), v->end(), newAllocator);
+    // Replace the contents of *v, including the stateful allocator.
+    v->swap(newV);
+    // The destructor of newV cleans up the memory formerly managed by *v.
+}
  
  }      // namespace gmx
  
  
  }      // namespace gmx
  
diff --git a/src/gromacs/gpu_utils/pinning.cu b/src/gromacs/gpu_utils/pinning.cu

new file mode 100644 (file)

index 0000000..fb95d90
--- /dev/null
+++ b/src/gromacs/gpu_utils/pinning.cu
@@ -0,0 +1,124 @@
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2017, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \internal \file
+ * \brief Implements functions for pinning memory to be suitable for
+ * efficient GPU transfers on CUDA.
+ *
+ * \author Mark Abraham <mark.j.abraham@gmail.com>
+ */
+#include "gmxpre.h"
+
+#include "pinning.h"
+
+#include <cstddef>
+
+#include "gromacs/gpu_utils/cudautils.cuh"
+#include "gromacs/utility/alignedallocator.h"
+#include "gromacs/utility/exceptions.h"
+#include "gromacs/utility/gmxassert.h"
+#include "gromacs/utility/stringutil.h"
+
+namespace gmx
+{
+
+//! Is \c ptr aligned on a boundary that is a multiple of \c bytes.
+static inline bool isAligned(const void *ptr, size_t bytes)
+{
+    return (reinterpret_cast<intptr_t>(ptr) % bytes) == 0;
+}
+
+void pinBuffer(void *pointer, std::size_t numBytes) noexcept
+{
+    const char *errorMessage = "Could not register the host memory for page locking for GPU transfers.";
+
+    GMX_ASSERT(isAligned(pointer, PageAlignedAllocationPolicy::alignment()),
+               formatString("%s Host memory needs to be page aligned.", errorMessage).c_str());
+
+    ensureNoPendingCudaError(errorMessage);
+    cudaError_t stat = cudaHostRegister(pointer, numBytes, cudaHostRegisterDefault);
+
+    // These errors can only arise from a coding error somewhere.
+    GMX_RELEASE_ASSERT(stat != cudaErrorInvalidValue &&
+                       stat != cudaErrorNotSupported &&
+                       stat != cudaErrorHostMemoryAlreadyRegistered,
+                       formatString("%s %s: %s", errorMessage,
+                                    cudaGetErrorName(stat), cudaGetErrorString(stat)).c_str());
+
+    // We always handle the error, but if it's a type we didn't expect
+    // (e.g. because CUDA changes the set of errors it returns) then
+    // we should get a descriptive assertion in Debug mode so we know
+    // to fix our expectations.
+    GMX_ASSERT(stat != cudaErrorMemoryAllocation,
+               formatString("%s %s: %s which was an unexpected error", errorMessage,
+                            cudaGetErrorName(stat), cudaGetErrorString(stat)).c_str());
+
+    // It might be preferable to throw InternalError here, because the
+    // failing condition can only happen when GROMACS is used with a
+    // CUDA API that can return some other error code. But we can't
+    // engineer GROMACS to be forward-compatible with future CUDA
+    // versions, so if this proves to be a problem in practice, then
+    // GROMACS must be patched, or a supported CUDA version used.
+    GMX_RELEASE_ASSERT(stat == cudaSuccess,
+                       formatString("%s %s: %s", errorMessage,
+                                    cudaGetErrorName(stat), cudaGetErrorString(stat)).c_str());
+}
+
+void unpinBuffer(void *pointer) noexcept
+{
+    const char *errorMessage = "Could not unregister pinned host memory used for GPU transfers.";
+
+    GMX_ASSERT(pointer != nullptr,
+               formatString("%s pointer should not be nullptr when pinned.", errorMessage).c_str());
+
+    ensureNoPendingCudaError(errorMessage);
+    cudaError_t stat = cudaHostUnregister(pointer);
+    // These errors can only arise from a coding error somewhere.
+    GMX_RELEASE_ASSERT(stat != cudaErrorInvalidValue && stat != cudaErrorHostMemoryNotRegistered,
+                       formatString("%s %s: %s", errorMessage,
+                                    cudaGetErrorName(stat), cudaGetErrorString(stat)).c_str());
+    // If there's an error whose type we didn't expect (e.g. because a
+    // future CUDA changes the set of errors it returns) then we
+    // should assert, because our code is wrong.
+    //
+    // The approach differs from that in pin() because we might
+    // unpin() from a destructor, in which case any attempt to throw
+    // an uncaught exception would anyway terminate the program. A
+    // release assertion is a better behaviour than that.
+    GMX_RELEASE_ASSERT(stat == cudaSuccess,
+                       formatString("%s %s: %s which was an unexpected error", errorMessage,
+                                    cudaGetErrorName(stat), cudaGetErrorString(stat)).c_str());
+}
+
+} // namespace gmx
diff --git a/src/gromacs/gpu_utils/hostallocator.cu b/src/gromacs/gpu_utils/pinning.h

similarity index 55%

rename from src/gromacs/gpu_utils/hostallocator.cu

rename to src/gromacs/gpu_utils/pinning.h

index bde92207dad442b27c365813825b5ab7fc8f0746..267b4f392bc2da79ad599fe43d1101ae6cb96664 100644 (file)
--- a/src/gromacs/gpu_utils/hostallocator.cu
+++ b/src/gromacs/gpu_utils/pinning.h
@@ -33,69 +33,32 @@
   * the research papers on the package. Check out http://www.gromacs.org.
   */
  /*! \internal \file
   * the research papers on the package. Check out http://www.gromacs.org.
   */
  /*! \internal \file
- * \brief Implements gmx::HostAllocationPolicy for allocating memory
- * suitable for GPU transfers on CUDA.
+ * \brief Declares functions for pinning memory to be suitable for
+ * efficient GPU transfers on CUDA.
   *
   * \author Mark Abraham <mark.j.abraham@gmail.com>
   */
   *
   * \author Mark Abraham <mark.j.abraham@gmail.com>
   */
-#include "gmxpre.h"
  
  
-#include "hostallocator.h"
-
-#include <cstdlib>
-
-#include "gromacs/utility/alignedallocator.h"
+#include <cstddef>
  
  namespace gmx
  {
  
  
  namespace gmx
  {
  
-HostAllocationPolicy::HostAllocationPolicy(Impl s) : allocateForGpu_(s) {}
-
-void *
-HostAllocationPolicy::malloc(std::size_t bytes) const
-{
-    void *buffer = nullptr;
-    if (allocateForGpu_ == Impl::AllocateForGpu)
-    {
-        if (bytes != 0)
-        {
-            // Alternatively, this could become a pair of
-            // e.g. PageAlignedAllocationPolicy and cudaHostRegister
-            // calls if that is useful for something.
-            cudaError_t stat = cudaMallocHost(&buffer, bytes, cudaHostAllocDefault);
-            // TODO Throw an exception upon failure, particularly
-            // for cudaErrorMemoryAllocation.
-            if (stat != cudaSuccess)
-            {
-                buffer = nullptr;
-            }
-        }
-    }
-    else
-    {
-        buffer = AlignedAllocationPolicy::malloc(bytes);
-    }
-    return buffer;
-}
-
-void
-HostAllocationPolicy::free(void *buffer) const
-{
-    if (buffer == nullptr)
-    {
-        return;
-    }
-    if (allocateForGpu_ == Impl::AllocateForGpu)
-    {
-        cudaFreeHost(buffer);
-        return;
-    }
-    AlignedAllocationPolicy::free(buffer);
-}
+/*! \brief Pin the allocation to physical memory.
+ *
+ * Requires that \c pointer is not nullptr.
+ *
+ * Does not throw.
+ */
+void pinBuffer(void *pointer, std::size_t numBytes) noexcept;
  
  
-HostAllocationPolicy makeHostAllocationPolicyForGpu()
-{
-    return HostAllocationPolicy(HostAllocationPolicy::Impl::AllocateForGpu);
-}
+/*! \brief Unpin the allocation.
+ *
+ * Requries that \c pointer is not nullptr and was previously pinned
+ * with pinBuffer().
+ *
+ * Does not throw.
+ */
+void unpinBuffer(void *pointer) noexcept;
  
  } // namespace gmx
  
  } // namespace gmx
diff --git a/src/gromacs/gpu_utils/tests/devicetransfers.cpp b/src/gromacs/gpu_utils/tests/devicetransfers.cpp

index 039f4a0583a36422cb5e8e4dbf5fb0c181fed3ca..733cac22451e4aeaafeef95f0699248617fb6b64 100644 (file)
--- a/src/gromacs/gpu_utils/tests/devicetransfers.cpp
+++ b/src/gromacs/gpu_utils/tests/devicetransfers.cpp
@@ -42,15 +42,20 @@
  
  #include "devicetransfers.h"
  
  
  #include "devicetransfers.h"
  
+#include <algorithm>
+
  #include "gromacs/utility/arrayref.h"
  
  namespace gmx
  {
  
  #include "gromacs/utility/arrayref.h"
  
  namespace gmx
  {
  
-void doDeviceTransfers(const gmx_gpu_info_t & /*gpuInfo*/,
-                       ArrayRef<const char>   /*input*/,
-                       ArrayRef<char>         /* output */)
+void doDeviceTransfers(const gmx_gpu_info_t   & /*gpuInfo*/,
+                       ArrayRef<const char>   input,
+                       ArrayRef<char>         output)
  {
  {
+    GMX_RELEASE_ASSERT(input.size() == output.size(), "Input and output must have matching size");
+    // We can't have any valid GPUs for this build configuration.
+    std::copy(input.begin(), input.end(), output.begin());
  }
  
  } // namespace gmx
  }
  
  } // namespace gmx
diff --git a/src/gromacs/gpu_utils/tests/devicetransfers.cu b/src/gromacs/gpu_utils/tests/devicetransfers.cu

index 940712d48913bf48b074879dbd8d0a4964d28035..e2d6a0d2bad543767c380f2642d06d6f5a392ab8 100644 (file)
--- a/src/gromacs/gpu_utils/tests/devicetransfers.cu
+++ b/src/gromacs/gpu_utils/tests/devicetransfers.cu
@@ -79,8 +79,12 @@ void doDeviceTransfers(const gmx_gpu_info_t &gpuInfo,
                         ArrayRef<char>        output)
  {
      GMX_RELEASE_ASSERT(input.size() == output.size(), "Input and output must have matching size");
                         ArrayRef<char>        output)
  {
      GMX_RELEASE_ASSERT(input.size() == output.size(), "Input and output must have matching size");
+    if (gpuInfo.n_dev == 0)
+    {
+        std::copy(input.begin(), input.end(), output.begin());
+        return;
+    }
      cudaError_t status;
      cudaError_t status;
-    GMX_RELEASE_ASSERT(gpuInfo.n_dev > 0, "Must have a GPU device");
  
      const auto &device = gpuInfo.gpu_dev[0];
      int         oldDeviceId;
  
      const auto &device = gpuInfo.gpu_dev[0];
      int         oldDeviceId;
diff --git a/src/gromacs/gpu_utils/tests/devicetransfers.h b/src/gromacs/gpu_utils/tests/devicetransfers.h

index 7ea88a1723cc7f86b861c4a9307cc25741289cd8..9ce7a428154eca51b8f9f7a773e11382c7bf776b 100644 (file)
--- a/src/gromacs/gpu_utils/tests/devicetransfers.h
+++ b/src/gromacs/gpu_utils/tests/devicetransfers.h
@@ -60,8 +60,9 @@ namespace gmx
  
  /*! \brief Helper function for GPU test code to be platform agnostic.
   *
  
  /*! \brief Helper function for GPU test code to be platform agnostic.
   *
- * Transfers \c input to device 0, which must be present, and
- * transfers it back into \c output. Both sizes must match.
+ * Transfers \c input to device 0, if present, and transfers it back
+ * into \c output. Both sizes must match. If no devices are present,
+ * do a simple host-side buffer copy instead.
   *
   * \throws InternalError  Upon any GPU API error condition. */
  void doDeviceTransfers(const gmx_gpu_info_t &gpuInfo,
   *
   * \throws InternalError  Upon any GPU API error condition. */
  void doDeviceTransfers(const gmx_gpu_info_t &gpuInfo,
diff --git a/src/gromacs/gpu_utils/tests/hostallocator.cpp b/src/gromacs/gpu_utils/tests/hostallocator.cpp

index 689987bec79b65217b228af613bbfe8642628c1c..809cce2b5a593292c82126e9493dc83f9943de32 100644 (file)
--- a/src/gromacs/gpu_utils/tests/hostallocator.cpp
+++ b/src/gromacs/gpu_utils/tests/hostallocator.cpp
@@ -42,12 +42,16 @@
  
  #include "gromacs/gpu_utils/hostallocator.h"
  
  
  #include "gromacs/gpu_utils/hostallocator.h"
  
+#include "config.h"
+
  #include <type_traits>
  #include <vector>
  
  #include <gtest/gtest.h>
  
  #include <type_traits>
  #include <vector>
  
  #include <gtest/gtest.h>
  
+#include "gromacs/gpu_utils/gpu_utils.h"
  #include "gromacs/math/vectypes.h"
  #include "gromacs/math/vectypes.h"
+#include "gromacs/utility/arrayref.h"
  #include "gromacs/utility/real.h"
  
  #include "devicetransfers.h"
  #include "gromacs/utility/real.h"
  
  #include "devicetransfers.h"
@@ -59,25 +63,20 @@ namespace gmx
  namespace
  {
  
  namespace
  {
  
-//! The types used in testing.
-typedef ::testing::Types<int, real, RVec> TestTypes;
-
-//! Typed test fixture
+/*! \internal \brief Typed test fixture for infrastructure for
+ * host-side memory used for GPU transfers. */
  template <typename T>
  template <typename T>
-class HostAllocatorTest : public test::GpuTest
+class HostMemoryTest : public test::GpuTest
  {
      public:
          //! Convenience type
          using ValueType = T;
          //! Convenience type
  {
      public:
          //! Convenience type
          using ValueType = T;
          //! Convenience type
-        using AllocatorType = HostAllocator<T>;
-        //! Convenience type
-        using VectorType = std::vector<ValueType, AllocatorType>;
-        //! Convenience type
          using ViewType = ArrayRef<ValueType>;
          //! Convenience type
          using ConstViewType = ArrayRef<const ValueType>;
          //! Prepare contents of a VectorType.
          using ViewType = ArrayRef<ValueType>;
          //! Convenience type
          using ConstViewType = ArrayRef<const ValueType>;
          //! Prepare contents of a VectorType.
+        template <typename VectorType>
          void fillInput(VectorType *input) const;
          //! Compares input and output vectors.
          void compareVectors(ConstViewType input,
          void fillInput(VectorType *input) const;
          //! Compares input and output vectors.
          void compareVectors(ConstViewType input,
@@ -87,25 +86,30 @@ class HostAllocatorTest : public test::GpuTest
  };
  
  // Already documented
  };
  
  // Already documented
-template <typename T>
-void HostAllocatorTest<T>::fillInput(VectorType *input) const
+template <typename T> template <typename VectorType>
+void HostMemoryTest<T>::fillInput(VectorType *input) const
  {
  {
-    input->push_back(1);
-    input->push_back(2);
-    input->push_back(3);
+    input->resize(3);
+    (*input)[0] = 1;
+    (*input)[1] = 2;
+    (*input)[2] = 3;
  }
  
  //! Initialization specialization for RVec
  }
  
  //! Initialization specialization for RVec
-template <>
-void HostAllocatorTest<RVec>::fillInput(VectorType *input) const
+template <> template <typename VectorType>
+void HostMemoryTest<RVec>::fillInput(VectorType *input) const
  {
  {
-    input->push_back({1, 2, 3});
+    input->reserve(3);
+    input->resize(3);
+    (*input)[0] = {1, 2, 3};
+    (*input)[1] = {4, 5, 6};
+    (*input)[2] = {7, 8, 9};
  }
  
  // Already documented
  template <typename T>
  }
  
  // Already documented
  template <typename T>
-void HostAllocatorTest<T>::compareVectors(ConstViewType input,
-                                          ConstViewType output) const
+void HostMemoryTest<T>::compareVectors(ConstViewType input,
+                                       ConstViewType output) const
  {
      for (size_t i = 0; i != input.size(); ++i)
      {
  {
      for (size_t i = 0; i != input.size(); ++i)
      {
@@ -115,8 +119,8 @@ void HostAllocatorTest<T>::compareVectors(ConstViewType input,
  
  //! Comparison specialization for RVec
  template <>
  
  //! Comparison specialization for RVec
  template <>
-void HostAllocatorTest<RVec>::compareVectors(ConstViewType input,
-                                             ConstViewType output) const
+void HostMemoryTest<RVec>::compareVectors(ConstViewType input,
+                                          ConstViewType output) const
  {
      for (size_t i = 0; i != input.size(); ++i)
      {
  {
      for (size_t i = 0; i != input.size(); ++i)
      {
@@ -146,15 +150,8 @@ ArrayRef<char> charArrayRefFromArray(T *data, size_t size)
  }
  
  template <typename T>
  }
  
  template <typename T>
-void HostAllocatorTest<T>::runTest(ConstViewType input, ViewType output) const
+void HostMemoryTest<T>::runTest(ConstViewType input, ViewType output) const
  {
  {
-    // We can't do a test that does a transfer unless we have a
-    // compatible device.
-    if (!this->haveValidGpus())
-    {
-        return;
-    }
-
      // Convert the views of input and output to flat non-const chars,
      // so that there's no templating when we call doDeviceTransfers.
      auto inputRef  = charArrayRefFromArray(input.data(), input.size());
      // Convert the views of input and output to flat non-const chars,
      // so that there's no templating when we call doDeviceTransfers.
      auto inputRef  = charArrayRefFromArray(input.data(), input.size());
@@ -164,6 +161,22 @@ void HostAllocatorTest<T>::runTest(ConstViewType input, ViewType output) const
      this->compareVectors(input, output);
  }
  
      this->compareVectors(input, output);
  }
  
+//! The types used in testing.
+typedef ::testing::Types<int, real, RVec> TestTypes;
+
+//! Typed test fixture
+template <typename T>
+class HostAllocatorTest : public HostMemoryTest<T>
+{
+    public:
+        //! Convenience type
+        using ValueType = T;
+        //! Convenience type
+        using AllocatorType = HostAllocator<T>;
+        //! Convenience type
+        using VectorType = std::vector<ValueType, AllocatorType>;
+};
+
  TYPED_TEST_CASE(HostAllocatorTest, TestTypes);
  
  // Note that in GoogleTest typed tests, the use of TestFixture:: and
  TYPED_TEST_CASE(HostAllocatorTest, TestTypes);
  
  // Note that in GoogleTest typed tests, the use of TestFixture:: and
@@ -178,48 +191,132 @@ TYPED_TEST(HostAllocatorTest, EmptyMemoryAlwaysWorks)
      typename TestFixture::VectorType v;
  }
  
      typename TestFixture::VectorType v;
  }
  
-TYPED_TEST(HostAllocatorTest, TransfersUsingDefaultHostAllocatorWork)
+TYPED_TEST(HostAllocatorTest, VectorsWithDefaultHostAllocatorAlwaysWorks)
  {
      typename TestFixture::VectorType input = {{1, 2, 3}}, output;
      output.resize(input.size());
  {
      typename TestFixture::VectorType input = {{1, 2, 3}}, output;
      output.resize(input.size());
+}
+
+// Several tests actually do CUDA transfers. This is not necessary
+// because the state of page alignment or pinning is not currently
+// relevant to the success of a CUDA transfer. CUDA checks happen only
+// during cudaHostRegister and cudaHostUnregister. Such tests are of
+// value only when this behaviour changes, if ever.
+
+TYPED_TEST(HostAllocatorTest, TransfersWithoutPinningWork)
+{
+    typename TestFixture::VectorType input;
+    this->fillInput(&input);
+    typename TestFixture::VectorType output;
+    output.resize(input.size());
  
      this->runTest(input, output);
  }
  
  
      this->runTest(input, output);
  }
  
-TYPED_TEST(HostAllocatorTest, TransfersUsingNormalCpuHostAllocatorWork)
+TYPED_TEST(HostAllocatorTest, FillInputAlsoWorksAfterCallingReserve)
  {
  {
-    // Make an allocator with a 'normal CPU' allocation policy. This
-    // might be slower than another policy, but still works.
-    using AllocatorType       = typename TestFixture::AllocatorType;
-    using AllocatorPolicyType = typename AllocatorType::allocation_policy;
-    AllocatorPolicyType              policy(AllocatorPolicyType::Impl::AllocateAligned);
-    AllocatorType                    allocator(policy);
+    typename TestFixture::VectorType input;
+    input.reserve(3);
+    this->fillInput(&input);
+}
+
+#if GMX_GPU == GMX_GPU_CUDA
  
  
-    typename TestFixture::VectorType input(allocator);
+// Policy suitable for pinning is only supported for a CUDA build
+
+TYPED_TEST(HostAllocatorTest, TransfersWithPinningWorkWithCuda)
+{
+    typename TestFixture::VectorType input;
+    changePinningPolicy(&input, PinningPolicy::CanBePinned);
      this->fillInput(&input);
      this->fillInput(&input);
-    typename TestFixture::VectorType output(allocator);
+    typename TestFixture::VectorType output;
+    changePinningPolicy(&output, PinningPolicy::CanBePinned);
      output.resize(input.size());
  
      this->runTest(input, output);
  }
  
      output.resize(input.size());
  
      this->runTest(input, output);
  }
  
-TYPED_TEST(HostAllocatorTest, TransfersUsingGpuHostAllocatorWork)
+//! Helper function for wrapping a call to isHostMemoryPinned.
+template <typename VectorType>
+bool isPinned(const VectorType &v)
  {
  {
-    // Make an allocator with a 'for GPU' allocation policy. This
-    // should be more efficient, but we can't test that.
-    using AllocatorType       = typename TestFixture::AllocatorType;
-    using AllocatorPolicyType = typename AllocatorType::allocation_policy;
-    AllocatorPolicyType              policy(AllocatorPolicyType::Impl::AllocateForGpu);
-    AllocatorType                    allocator(policy);
+    void *data = const_cast<void *>(static_cast<const void *>(v.data()));
+    return isHostMemoryPinned(data);
+}
+
+TYPED_TEST(HostAllocatorTest, ManualPinningOperationsWorkWithCuda)
+{
+    typename TestFixture::VectorType input;
+    changePinningPolicy(&input, PinningPolicy::CanBePinned);
+    EXPECT_FALSE(isPinned(input));
+
+    // Unpin before allocation is fine, but does nothing.
+    input.get_allocator().getPolicy().unpin();
+    EXPECT_FALSE(isPinned(input));
  
  
-    typename TestFixture::VectorType input(allocator);
+    // Pin with no contents is fine, but does nothing.
+    input.get_allocator().getPolicy().pin();
+    EXPECT_FALSE(isPinned(input));
+
+    // Fill some contents, which will be pinned because of the policy.
      this->fillInput(&input);
      this->fillInput(&input);
-    typename TestFixture::VectorType output(allocator);
-    output.resize(input.size());
+    EXPECT_TRUE(isPinned(input));
  
  
-    this->runTest(input, output);
+    // Unpin after pin is fine.
+    input.get_allocator().getPolicy().unpin();
+    EXPECT_FALSE(isPinned(input));
+
+    // Repeated unpin should be a no-op.
+    input.get_allocator().getPolicy().unpin();
+
+    // Pin after unpin is fine.
+    input.get_allocator().getPolicy().pin();
+    EXPECT_TRUE(isPinned(input));
+
+    // Repeated pin should be a no-op, and still pinned.
+    input.get_allocator().getPolicy().pin();
+    EXPECT_TRUE(isPinned(input));
+
+    // Switching policy to CannotBePinned must unpin the buffer (via
+    // realloc and copy).
+    auto oldInputData = input.data();
+    changePinningPolicy(&input, PinningPolicy::CannotBePinned);
+    EXPECT_FALSE(isPinned(input));
+    // These cannot be equal as both had to be allocated at the same
+    // time for the contents to be able to be copied.
+    EXPECT_NE(oldInputData, input.data());
+
+    // Switching policy to CanBePinned must pin the buffer (via
+    // realloc and copy).
+    oldInputData = input.data();
+    changePinningPolicy(&input, PinningPolicy::CanBePinned);
+    EXPECT_TRUE(isPinned(input));
+    // These cannot be equal as both had to be allocated at the same
+    // time for the contents to be able to be copied.
+    EXPECT_NE(oldInputData, input.data());
+}
+
+#else
+
+TYPED_TEST(HostAllocatorTest, ChangingPinningPolicyRequiresCuda)
+{
+    typename TestFixture::VectorType input;
+    EXPECT_DEATH(changePinningPolicy(&input, PinningPolicy::CanBePinned),
+                 ".*A suitable build of GROMACS.* is required.*");
  }
  
  }
  
+TYPED_TEST(HostAllocatorTest, ManualPinningOperationsWorkEvenWithoutCuda)
+{
+    typename TestFixture::VectorType input;
+
+    // Since the buffer can't be pinned and isn't pinned, and the
+    // calling code can't be unhappy about this, these are OK.
+    input.get_allocator().getPolicy().pin();
+    input.get_allocator().getPolicy().unpin();
+}
+
+#endif
+
  TYPED_TEST(HostAllocatorTest, StatefulAllocatorUsesMemory)
  {
      // The HostAllocator has state, so a container using it will be
  TYPED_TEST(HostAllocatorTest, StatefulAllocatorUsesMemory)
  {
      // The HostAllocator has state, so a container using it will be
@@ -229,5 +326,16 @@ TYPED_TEST(HostAllocatorTest, StatefulAllocatorUsesMemory)
                sizeof(typename TestFixture::VectorType));
  }
  
                sizeof(typename TestFixture::VectorType));
  }
  
+//! Declare allocator types to test.
+using AllocatorTypesToTest = ::testing::Types<HostAllocator<real>,
+                                              HostAllocator<int>,
+                                              HostAllocator<RVec>
+                                              >;
+
+TYPED_TEST_CASE(AllocatorTest, AllocatorTypesToTest);
+
  } // namespace
  } // namespace
  } // namespace
  } // namespace
+
+// Includes tests common to all allocation policies.
+#include "gromacs/utility/tests/alignedallocator-impl.h"
diff --git a/src/gromacs/mdlib/mdatoms.cpp b/src/gromacs/mdlib/mdatoms.cpp

index 31efd4e7bdb73e504360d91ccb3bb6760161302d..fe998ff9c53c1811ce9a6ef99fd091495aefefe0 100644 (file)
--- a/src/gromacs/mdlib/mdatoms.cpp
+++ b/src/gromacs/mdlib/mdatoms.cpp
@@ -43,6 +43,7 @@
  #include <memory>
  
  #include "gromacs/compat/make_unique.h"
  #include <memory>
  
  #include "gromacs/compat/make_unique.h"
+#include "gromacs/gpu_utils/hostallocator.h"
  #include "gromacs/math/functions.h"
  #include "gromacs/mdlib/gmx_omp_nthreads.h"
  #include "gromacs/mdlib/qmmm.h"
  #include "gromacs/math/functions.h"
  #include "gromacs/mdlib/gmx_omp_nthreads.h"
  #include "gromacs/mdlib/qmmm.h"
@@ -51,7 +52,6 @@
  #include "gromacs/topology/mtop_lookup.h"
  #include "gromacs/topology/mtop_util.h"
  #include "gromacs/topology/topology.h"
  #include "gromacs/topology/mtop_lookup.h"
  #include "gromacs/topology/mtop_util.h"
  #include "gromacs/topology/topology.h"
-#include "gromacs/utility/alignedallocator.h"
  #include "gromacs/utility/exceptions.h"
  #include "gromacs/utility/smalloc.h"
  
  #include "gromacs/utility/exceptions.h"
  #include "gromacs/utility/smalloc.h"
  
@@ -60,8 +60,8 @@
  namespace gmx
  {
  
  namespace gmx
  {
  
-MDAtoms::MDAtoms(HostAllocationPolicy policy)
-    : mdatoms_(nullptr), chargeA_(policy)
+MDAtoms::MDAtoms()
+    : mdatoms_(nullptr), chargeA_()
  {
  }
  
  {
  }
  
@@ -81,10 +81,9 @@ std::unique_ptr<MDAtoms>
  makeMDAtoms(FILE *fp, const gmx_mtop_t &mtop, const t_inputrec &ir,
              bool useGpuForPme)
  {
  makeMDAtoms(FILE *fp, const gmx_mtop_t &mtop, const t_inputrec &ir,
              bool useGpuForPme)
  {
-    auto policy = (useGpuForPme ?
-                   makeHostAllocationPolicyForGpu() :
-                   HostAllocationPolicy());
-    auto       mdAtoms = compat::make_unique<MDAtoms>(policy);
+    auto       mdAtoms = compat::make_unique<MDAtoms>();
+    // GPU transfers want to use the pinning mode.
+    changePinningPolicy(&mdAtoms->chargeA_, useGpuForPme ? PinningPolicy::CanBePinned : PinningPolicy::CannotBePinned);
      t_mdatoms *md;
      snew(md, 1);
      mdAtoms->mdatoms_.reset(md);
      t_mdatoms *md;
      snew(md, 1);
      mdAtoms->mdatoms_.reset(md);
diff --git a/src/gromacs/mdlib/mdatoms.h b/src/gromacs/mdlib/mdatoms.h

index b537e0df2f6b25c54124752394f05a5d026e14f3..8e708b3e0dde55f4cead60c7ae9b1101d44f26a4 100644 (file)
--- a/src/gromacs/mdlib/mdatoms.h
+++ b/src/gromacs/mdlib/mdatoms.h
@@ -69,11 +69,11 @@ class MDAtoms
      //! C-style mdatoms struct.
      unique_cptr<t_mdatoms> mdatoms_;
      //! Memory for chargeA that can be set up for efficient GPU transfer.
      //! C-style mdatoms struct.
      unique_cptr<t_mdatoms> mdatoms_;
      //! Memory for chargeA that can be set up for efficient GPU transfer.
-    std::vector < real, HostAllocator < real>> chargeA_;
+    HostVector<real>       chargeA_;
      public:
          // TODO make this private
          //! Constructor.
      public:
          // TODO make this private
          //! Constructor.
-        MDAtoms(HostAllocationPolicy policy);
+        MDAtoms();
          //! Getter.
          t_mdatoms *mdatoms()
          {
          //! Getter.
          t_mdatoms *mdatoms()
          {
diff --git a/src/gromacs/utility/allocator.h b/src/gromacs/utility/allocator.h

index 50ee1f0834ef4960065a1acf0b777d3d5df6a323..3d243adb7142b2aed977372fae54dc904342f733 100644 (file)
--- a/src/gromacs/utility/allocator.h
+++ b/src/gromacs/utility/allocator.h
@@ -201,6 +201,12 @@ class Allocator : public AllocationPolicy
              AllocationPolicy::free(p);
          }
  
              AllocationPolicy::free(p);
          }
  
+        //! Return the policy object for this allocator.
+        AllocationPolicy getPolicy() const
+        {
+            return *this;
+        }
+
          /*! \brief Construct an object without allocating memory
           *
           * \tparam Args  Variable-length list of types for constructor args
          /*! \brief Construct an object without allocating memory
           *
           * \tparam Args  Variable-length list of types for constructor args
diff --git a/src/gromacs/utility/tests/alignedallocator-impl.h b/src/gromacs/utility/tests/alignedallocator-impl.h

new file mode 100644 (file)

index 0000000..d592288
--- /dev/null
+++ b/src/gromacs/utility/tests/alignedallocator-impl.h
@@ -0,0 +1,129 @@
+/*
+ * This file is part of the GROMACS molecular simulation package.
+ *
+ * Copyright (c) 2015,2017, by the GROMACS development team, led by
+ * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
+ * and including many others, as listed in the AUTHORS file in the
+ * top-level source directory and at http://www.gromacs.org.
+ *
+ * GROMACS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1
+ * of the License, or (at your option) any later version.
+ *
+ * GROMACS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GROMACS; if not, see
+ * http://www.gnu.org/licenses, or write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
+ *
+ * If you want to redistribute modifications to GROMACS, please
+ * consider that scientific software is very special. Version
+ * control is crucial - bugs must be traceable. We will be happy to
+ * consider code for inclusion in the official distribution, but
+ * derived work must not be called official GROMACS. Details are found
+ * in the README & COPYING files - if they are missing, get the
+ * official version at http://www.gromacs.org.
+ *
+ * To help us fund GROMACS development, we humbly ask that you cite
+ * the research papers on the package. Check out http://www.gromacs.org.
+ */
+/*! \libinternal \file
+ * \brief Tests for allocators that offer a minimum alignment.
+ *
+ * This implementation header can be included in multiple modules
+ * tests, which is currently needed because gpu_utils is physically
+ * separate from the utility module.
+ *
+ * \author Erik Lindahl <erik.lindahl@gmail.com>
+ * \author Mark Abraham <mark.j.abraham@gmail.com>
+ * \inlibraryapi
+ * \ingroup module_utility
+ */
+#ifndef GMX_UTILITY_TESTS_ALIGNEDALLOCATOR_IMPL_H
+#define GMX_UTILITY_TESTS_ALIGNEDALLOCATOR_IMPL_H
+
+#include <cstddef>
+
+#include <vector>
+
+#include <gtest/gtest.h>
+
+#include "gromacs/math/vectypes.h"
+#include "gromacs/utility/real.h"
+
+namespace gmx
+{
+namespace test
+{
+
+/*! \libinternal
+ * \brief Templated test fixture. */
+template <typename T>
+class AllocatorTest : public ::testing::Test
+{
+    public:
+        /*! \brief Return a bitmask for testing the alignment.
+         *
+         * e.g. for 128-byte alignment the mask is 128-1 - all of
+         * these bits should be zero in pointers that have the
+         * intended alignment. */
+        std::size_t mask(const T &allocator)
+        {
+            return allocator.getPolicy().alignment() - 1;
+        }
+};
+
+// NB need to use this->mask() because of GoogleTest quirks
+
+TYPED_TEST(AllocatorTest, AllocatorAlignAllocatesWithAlignment)
+{
+    using pointer = typename TypeParam::pointer;
+    TypeParam a;
+    pointer   p = a.allocate(1000);
+
+    EXPECT_EQ(0, reinterpret_cast<std::size_t>(p) & this->mask(a));
+    a.deallocate(p, 1000);
+}
+
+
+TYPED_TEST(AllocatorTest, VectorAllocatesAndResizesWithAlignment)
+{
+    using value_type = typename TypeParam::value_type;
+    std::vector<value_type, TypeParam> v(10);
+    EXPECT_EQ(0, reinterpret_cast<std::size_t>(v.data()) & this->mask(v.get_allocator()));
+
+    // Reserve a few times to check things work ok, making sure we
+    // will trigger several reallocations on common vector
+    // implementations.
+    for (std::size_t i = 1000; i <= 10000; i += 1000)
+    {
+        v.resize(i);
+        EXPECT_EQ(0, reinterpret_cast<std::size_t>(v.data()) & this->mask(v.get_allocator()));
+    }
+}
+
+TYPED_TEST(AllocatorTest, VectorAllocatesAndReservesWithAlignment)
+{
+    using value_type = typename TypeParam::value_type;
+    std::vector<value_type, TypeParam> v(10);
+    EXPECT_EQ(0, reinterpret_cast<std::size_t>(v.data()) & this->mask(v.get_allocator()));
+
+    // Reserve a few times to check things work ok, making sure we
+    // will trigger several reallocations on common vector
+    // implementations.
+    for (std::size_t i = 1000; i <= 10000; i += 1000)
+    {
+        v.reserve(i);
+        EXPECT_EQ(0, reinterpret_cast<std::size_t>(v.data()) & this->mask(v.get_allocator()));
+    }
+}
+
+} // namespace
+} // namespace
+
+#endif
diff --git a/src/gromacs/utility/tests/alignedallocator.cpp b/src/gromacs/utility/tests/alignedallocator.cpp

index 09dc6101574d3e5ccd170c78ccb12a88074b2e98..11a695336f61262feb7242c12c7c085e360ed0d4 100644 (file)
--- a/src/gromacs/utility/tests/alignedallocator.cpp
+++ b/src/gromacs/utility/tests/alignedallocator.cpp
@@ -33,7 +33,7 @@
   * the research papers on the package. Check out http://www.gromacs.org.
   */
  /*! \internal \file
   * the research papers on the package. Check out http://www.gromacs.org.
   */
  /*! \internal \file
- * \brief Tests for gmx::AlignedAllocator
+ * \brief Tests for gmx::AlignedAllocator and gmx::PageAlignedAllocator.
   *
   * \author Erik Lindahl <erik.lindahl@gmail.com>
   * \author Mark Abraham <mark.j.abraham@gmail.com>
   *
   * \author Erik Lindahl <erik.lindahl@gmail.com>
   * \author Mark Abraham <mark.j.abraham@gmail.com>
@@ -49,23 +49,11 @@
  #include <gtest/gtest.h>
  
  #include "gromacs/math/vectypes.h"
  #include <gtest/gtest.h>
  
  #include "gromacs/math/vectypes.h"
-#include "gromacs/utility/real.h"
  
  namespace gmx
  {
  
  namespace gmx
  {
-
-//! Templated test fixture.
-template <typename T>
-class AllocatorTest : public ::testing::Test
+namespace test
  {
  {
-    public:
-        /*! \brief Bitmask for testing the alignment.
-         *
-         * e.g. for 128-byte alignment the mask is 128-1 - all of
-         * these bits should be zero in pointers that have the
-         * intended alignment. */
-        std::size_t mask_ = T::allocation_policy::alignment()-1;
-};
  
  //! Declare allocator types to test.
  using AllocatorTypesToTest = ::testing::Types<AlignedAllocator<real>,
  
  //! Declare allocator types to test.
  using AllocatorTypesToTest = ::testing::Types<AlignedAllocator<real>,
@@ -75,52 +63,19 @@ using AllocatorTypesToTest = ::testing::Types<AlignedAllocator<real>,
                                                AlignedAllocator<RVec>,
                                                PageAlignedAllocator<RVec>
                                                >;
                                                AlignedAllocator<RVec>,
                                                PageAlignedAllocator<RVec>
                                                >;
-TYPED_TEST_CASE(AllocatorTest, AllocatorTypesToTest);
  
  
-// NB need to use this->mask_ because of GoogleTest quirks
-
-TYPED_TEST(AllocatorTest, AllocatorAlignAllocatesWithAlignment)
-{
-    using pointer = typename TypeParam::pointer;
-    TypeParam a;
-    pointer   p = a.allocate(1000);
+TYPED_TEST_CASE(AllocatorTest, AllocatorTypesToTest);
  
  
-    EXPECT_EQ(0, reinterpret_cast<std::size_t>(p) & this->mask_);
-    a.deallocate(p, 1000);
-}
+} // namespace
+} // namespace
  
  
+// Includes tests common to all allocation policies.
+#include "gromacs/utility/tests/alignedallocator-impl.h"
  
  
-TYPED_TEST(AllocatorTest, VectorAllocatesAndResizesWithAlignment)
+namespace gmx
  {
  {
-    using value_type = typename TypeParam::value_type;
-    std::vector<value_type, TypeParam> v(10);
-    EXPECT_EQ(0, reinterpret_cast<std::size_t>(v.data()) & this->mask_);
-
-    // Reserve a few times to check things work ok, making sure we
-    // will trigger several reallocations on common vector
-    // implementations.
-    for (std::size_t i = 1000; i <= 10000; i += 1000)
-    {
-        v.resize(i);
-        EXPECT_EQ(0, reinterpret_cast<std::size_t>(v.data()) & this->mask_);
-    }
-}
-
-TYPED_TEST(AllocatorTest, VectorAllocatesAndReservesWithAlignment)
+namespace test
  {
  {
-    using value_type = typename TypeParam::value_type;
-    std::vector<value_type, TypeParam> v(10);
-    EXPECT_EQ(0, reinterpret_cast<std::size_t>(v.data()) & this->mask_);
-
-    // Reserve a few times to check things work ok, making sure we
-    // will trigger several reallocations on common vector
-    // implementations.
-    for (std::size_t i = 1000; i <= 10000; i += 1000)
-    {
-        v.reserve(i);
-        EXPECT_EQ(0, reinterpret_cast<std::size_t>(v.data()) & this->mask_);
-    }
-}
  
  TYPED_TEST(AllocatorTest, StatelessAllocatorUsesNoMemory)
  {
  
  TYPED_TEST(AllocatorTest, StatelessAllocatorUsesNoMemory)
  {
@@ -129,4 +84,5 @@ TYPED_TEST(AllocatorTest, StatelessAllocatorUsesNoMemory)
                sizeof(std::vector<value_type, TypeParam>));
  }
  
                sizeof(std::vector<value_type, TypeParam>));
  }
  
-}
+} // namespace
+} // namespace
author	Mark Abraham <mark.j.abraham@gmail.com>
	Sat, 11 Nov 2017 19:50:20 +0000 (12:50 -0700)
committer	Mark Abraham <mark.j.abraham@gmail.com>
	Tue, 21 Nov 2017 06:34:56 +0000 (07:34 +0100)
src/gromacs/gpu_utils/CMakeLists.txt		patch \| blob \| history
src/gromacs/gpu_utils/cudautils.cuh		patch \| blob \| history
src/gromacs/gpu_utils/hostallocator.cpp		patch \| blob \| history
src/gromacs/gpu_utils/hostallocator.h		patch \| blob \| history
src/gromacs/gpu_utils/pinning.cu	[new file with mode: 0644]	patch \| blob
src/gromacs/gpu_utils/pinning.h	[moved from src/gromacs/gpu_utils/hostallocator.cu with 55% similarity]	patch \| blob \| history
src/gromacs/gpu_utils/tests/devicetransfers.cpp		patch \| blob \| history
src/gromacs/gpu_utils/tests/devicetransfers.cu		patch \| blob \| history
src/gromacs/gpu_utils/tests/devicetransfers.h		patch \| blob \| history
src/gromacs/gpu_utils/tests/hostallocator.cpp		patch \| blob \| history
src/gromacs/mdlib/mdatoms.cpp		patch \| blob \| history
src/gromacs/mdlib/mdatoms.h		patch \| blob \| history
src/gromacs/utility/allocator.h		patch \| blob \| history
src/gromacs/utility/tests/alignedallocator-impl.h	[new file with mode: 0644]	patch \| blob
src/gromacs/utility/tests/alignedallocator.cpp		patch \| blob \| history