Use DeviceBuffer in GPU update and NBNXM code

author Andrey Alekseenko <al42and@gmail.com>

Thu, 11 Mar 2021 10:48:06 +0000 (13:48 +0300)

committer Szilárd Páll <pall.szilard@gmail.com>

Wed, 17 Mar 2021 18:22:29 +0000 (18:22 +0000)
author Andrey Alekseenko <al42and@gmail.com>
Thu, 11 Mar 2021 10:48:06 +0000 (13:48 +0300)
committer Szilárd Páll <pall.szilard@gmail.com>
Wed, 17 Mar 2021 18:22:29 +0000 (18:22 +0000)
diff --git a/src/gromacs/gpu_utils/gputraits.h b/src/gromacs/gpu_utils/gputraits.h

index 38c5edf8a90d0b87bcaf6a2fd48065bd4c4a26b6..344b0427c1ae335fd03a69e890e1dd974e9d3ec8 100644 (file)
--- a/src/gromacs/gpu_utils/gputraits.h
+++ b/src/gromacs/gpu_utils/gputraits.h
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -66,6 +66,56 @@ using DeviceTexture = void*;
  //! \brief Single GPU call timing event
  using CommandEvent = void*;
  
+// Stubs for CPU-only build. Might be changed in #3312.
+struct Float2
+{
+};
+struct Float3
+{
+};
+struct Float4
+{
+};
+
  #endif // GMX_GPU
  
+namespace gmx
+{
+template<typename T>
+static inline Float3* asGenericFloat3Pointer(T* in)
+{
+    static_assert(sizeof(T) == sizeof(Float3),
+                  "Size of the host-side data-type is different from the size of the generic "
+                  "device-side counterpart.");
+    return reinterpret_cast<Float3*>(in);
+}
+
+template<typename T>
+static inline const Float3* asGenericFloat3Pointer(const T* in)
+{
+    static_assert(sizeof(T) == sizeof(Float3),
+                  "Size of the host-side data-type is different from the size of the generic "
+                  "device-side counterpart.");
+    return reinterpret_cast<const Float3*>(in);
+}
+
+template<typename C>
+static inline Float3* asGenericFloat3Pointer(C& in)
+{
+    static_assert(sizeof(*in.data()) == sizeof(Float3),
+                  "Size of the host-side data-type is different from the size of the device-side "
+                  "counterpart.");
+    return reinterpret_cast<Float3*>(in.data());
+}
+
+template<typename C>
+static inline const Float3* asGenericFloat3Pointer(const C& in)
+{
+    static_assert(sizeof(*in.data()) == sizeof(Float3),
+                  "Size of the host-side data-type is different from the size of the device-side "
+                  "counterpart.");
+    return reinterpret_cast<const Float3*>(in.data());
+}
+} // namespace gmx
+
  #endif // GMX_GPU_UTILS_GPUTRAITS_H
diff --git a/src/gromacs/gpu_utils/typecasts.cuh b/src/gromacs/gpu_utils/typecasts.cuh

index 1dd63b719313c673246366c3b66b62e937b0f2ef..d98c5877037391d248f7d80ecc5ac444d6c4674b 100644 (file)
--- a/src/gromacs/gpu_utils/typecasts.cuh
+++ b/src/gromacs/gpu_utils/typecasts.cuh
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2020, by the GROMACS development team, led by
+ * Copyright (c) 2020,2021, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -60,4 +60,25 @@ static inline __host__ __device__ float3* asFloat3(gmx::RVec* in)
      return reinterpret_cast<float3*>(in);
  }
  
+/*! \brief Cast pointer RVec buffer to a pointer to float3 buffer.
+ *
+ * \param[in] in The Pointer to RVec buffer to cast.
+ *
+ * \returns Buffer pointer, casted to float3*.
+ */
+static inline __host__ __device__ float3** asFloat3Pointer(gmx::RVec** in)
+{
+    static_assert(sizeof((*in)[0]) == sizeof(float3),
+                  "Size of the host-side data-type is different from the size of the device-side "
+                  "counterpart.");
+    return reinterpret_cast<float3**>(in);
+}
+static inline __host__ __device__ const float3* const* asFloat3Pointer(const gmx::RVec* const* in)
+{
+    static_assert(sizeof((*in)[0]) == sizeof(float3),
+                  "Size of the host-side data-type is different from the size of the device-side "
+                  "counterpart.");
+    return reinterpret_cast<const float3* const*>(in);
+}
+
  #endif // GMX_GPU_UTILS_TYPECASTS_CUH
diff --git a/src/gromacs/mdlib/gpuforcereduction.h b/src/gromacs/mdlib/gpuforcereduction.h

index 157c4c7eca5a2be662f7a47ce7dc4d287803fe51..b23df660ed3fc1fb5ecdce731463e304c6538ac7 100644 (file)
--- a/src/gromacs/mdlib/gpuforcereduction.h
+++ b/src/gromacs/mdlib/gpuforcereduction.h
@@ -86,7 +86,7 @@ public:
       *
       * \param [in] forcePtr  Pointer to force to be reduced
       */
-    void registerNbnxmForce(void* forcePtr);
+    void registerNbnxmForce(DeviceBuffer<RVec> forcePtr);
  
      /*! \brief Register a rvec-format force to be reduced
       *
diff --git a/src/gromacs/mdlib/gpuforcereduction_impl.cpp b/src/gromacs/mdlib/gpuforcereduction_impl.cpp

index 1e0a30b2b6aba01c90831eaba7661d7866255e80..b431fbad495c950c63cd568c287cf3305810ac52 100644 (file)
--- a/src/gromacs/mdlib/gpuforcereduction_impl.cpp
+++ b/src/gromacs/mdlib/gpuforcereduction_impl.cpp
@@ -76,7 +76,7 @@ void GpuForceReduction::reinit(DeviceBuffer<RVec> /*baseForcePtr*/,
  }
  
  // NOLINTNEXTLINE readability-convert-member-functions-to-static
-void GpuForceReduction::registerNbnxmForce(void* /* forcePtr */)
+void GpuForceReduction::registerNbnxmForce(DeviceBuffer<RVec> /* forcePtr */)
  {
      GMX_ASSERT(false, "A CPU stub has been called instead of the correct implementation.");
  }
diff --git a/src/gromacs/mdlib/gpuforcereduction_impl.cu b/src/gromacs/mdlib/gpuforcereduction_impl.cu

index 6e1e7e920a50b2b91e8e2d26149beee45c6d1380..dab7d4da0cbd8c9938a85766211d47d7a2ce9938 100644 (file)
--- a/src/gromacs/mdlib/gpuforcereduction_impl.cu
+++ b/src/gromacs/mdlib/gpuforcereduction_impl.cu
@@ -218,9 +218,9 @@ GpuForceReduction::GpuForceReduction(const DeviceContext& deviceContext,
  {
  }
  
-void GpuForceReduction::registerNbnxmForce(void* forcePtr)
+void GpuForceReduction::registerNbnxmForce(DeviceBuffer<Float3> forcePtr)
  {
-    impl_->registerNbnxmForce(reinterpret_cast<DeviceBuffer<RVec>>(forcePtr));
+    impl_->registerNbnxmForce(forcePtr);
  }
  
  void GpuForceReduction::registerRvecForce(void* forcePtr)
diff --git a/src/gromacs/mdlib/gpuforcereduction_impl.cuh b/src/gromacs/mdlib/gpuforcereduction_impl.cuh

index bd222e40a643a01296b1ec222b9f25204af3c3d8..c7d9493c821129515e44db0cb811ec3b3d6aef61 100644 (file)
--- a/src/gromacs/mdlib/gpuforcereduction_impl.cuh
+++ b/src/gromacs/mdlib/gpuforcereduction_impl.cuh
@@ -45,6 +45,7 @@
  
  #include "gromacs/gpu_utils/device_stream.h"
  #include "gromacs/gpu_utils/devicebuffer_datatype.h"
+#include "gromacs/gpu_utils/gputraits.h"
  #include "gromacs/math/vectypes.h"
  
  #include "gpuforcereduction.h"
@@ -82,13 +83,13 @@ public:
       *
       * \param [in] forcePtr  Pointer to force to be reduced
       */
-    void registerNbnxmForce(DeviceBuffer<RVec> forcePtr);
+    void registerNbnxmForce(DeviceBuffer<Float3> forcePtr);
  
      /*! \brief Register a rvec-format force to be reduced
       *
       * \param [in] forcePtr  Pointer to force to be reduced
       */
-    void registerRvecForce(DeviceBuffer<RVec> forcePtr);
+    void registerRvecForce(DeviceBuffer<Float3> forcePtr);
  
      /*! \brief Add a dependency for this force reduction
       *
diff --git a/src/gromacs/mdlib/leapfrog_gpu.cu b/src/gromacs/mdlib/leapfrog_gpu.cu

index 2f5b589870120960eb1b2c53e2c9f3393f1d06ff..75b1026cfc05059ad38a9e42ca68b365e4dabd70 100644 (file)
--- a/src/gromacs/mdlib/leapfrog_gpu.cu
+++ b/src/gromacs/mdlib/leapfrog_gpu.cu
@@ -57,6 +57,7 @@
  
  #include "gromacs/gpu_utils/cudautils.cuh"
  #include "gromacs/gpu_utils/devicebuffer.h"
+#include "gromacs/gpu_utils/typecasts.cuh"
  #include "gromacs/gpu_utils/vectype_ops.cuh"
  #include "gromacs/math/vec.h"
  #include "gromacs/mdtypes/group.h"
@@ -237,10 +238,10 @@ inline auto selectLeapFrogKernelPtr(bool                doTemperatureScaling,
      return kernelPtr;
  }
  
-void LeapFrogGpu::integrate(const DeviceBuffer<float3>        d_x,
-                            DeviceBuffer<float3>              d_xp,
-                            DeviceBuffer<float3>              d_v,
-                            const DeviceBuffer<float3>        d_f,
+void LeapFrogGpu::integrate(DeviceBuffer<Float3>              d_x,
+                            DeviceBuffer<Float3>              d_xp,
+                            DeviceBuffer<Float3>              d_v,
+                            const DeviceBuffer<Float3>        d_f,
                              const real                        dt,
                              const bool                        doTemperatureScaling,
                              gmx::ArrayRef<const t_grp_tcstat> tcstat,
@@ -283,20 +284,22 @@ void LeapFrogGpu::integrate(const DeviceBuffer<float3>        d_x,
                         "Fully anisotropic Parrinello-Rahman pressure coupling is not yet supported "
                         "in GPU version of Leap-Frog integrator.");
              prVelocityScalingMatrixDiagonal_ =
-                    make_float3(dtPressureCouple * prVelocityScalingMatrix[XX][XX],
-                                dtPressureCouple * prVelocityScalingMatrix[YY][YY],
-                                dtPressureCouple * prVelocityScalingMatrix[ZZ][ZZ]);
+                    Float3{ dtPressureCouple * prVelocityScalingMatrix[XX][XX],
+                            dtPressureCouple * prVelocityScalingMatrix[YY][YY],
+                            dtPressureCouple * prVelocityScalingMatrix[ZZ][ZZ] };
          }
          kernelPtr = selectLeapFrogKernelPtr(doTemperatureScaling, numTempScaleValues_, prVelocityScalingType);
      }
  
+    // Checking the buffer types against the kernel argument types
+    static_assert(sizeof(*d_inverseMasses_) == sizeof(float));
      const auto kernelArgs = prepareGpuKernelArguments(kernelPtr,
                                                        kernelLaunchConfig_,
                                                        &numAtoms_,
-                                                      &d_x,
-                                                      &d_xp,
-                                                      &d_v,
-                                                      &d_f,
+                                                      asFloat3Pointer(&d_x),
+                                                      asFloat3Pointer(&d_xp),
+                                                      asFloat3Pointer(&d_v),
+                                                      asFloat3Pointer(&d_f),
                                                        &d_inverseMasses_,
                                                        &dt,
                                                        &d_lambdas_,
@@ -345,7 +348,7 @@ void LeapFrogGpu::set(const int numAtoms, const real* inverseMasses, const unsig
      reallocateDeviceBuffer(
              &d_inverseMasses_, numAtoms_, &numInverseMasses_, &numInverseMassesAlloc_, deviceContext_);
      copyToDeviceBuffer(
-            &d_inverseMasses_, (float*)inverseMasses, 0, numAtoms_, deviceStream_, GpuApiCallBehavior::Sync, nullptr);
+            &d_inverseMasses_, inverseMasses, 0, numAtoms_, deviceStream_, GpuApiCallBehavior::Sync, nullptr);
  
      // Temperature scale group map only used if there are more then one group
      if (numTempScaleValues_ > 1)
diff --git a/src/gromacs/mdlib/leapfrog_gpu.h b/src/gromacs/mdlib/leapfrog_gpu.h

index d7c77ff756f2ec8150e906a0e6ebe1233e31df28..738437aa0dd908be4107bc509c4efc4c8359ffe7 100644 (file)
--- a/src/gromacs/mdlib/leapfrog_gpu.h
+++ b/src/gromacs/mdlib/leapfrog_gpu.h
@@ -47,20 +47,16 @@
  #include "config.h"
  
  #if GMX_GPU_CUDA
-#    include "gromacs/gpu_utils/devicebuffer.cuh"
  #    include "gromacs/gpu_utils/gputraits.cuh"
  #endif
  #if GMX_GPU_SYCL
-#    include "gromacs/gpu_utils/devicebuffer_sycl.h"
  #    include "gromacs/gpu_utils/gputraits_sycl.h"
-using float3 = Float3;
  #endif
  
  #include <memory>
  
+#include "gromacs/gpu_utils/devicebuffer_datatype.h"
  #include "gromacs/gpu_utils/hostallocator.h"
-#include "gromacs/pbcutil/pbc.h"
-#include "gromacs/pbcutil/pbc_aiuc.h"
  #include "gromacs/utility/arrayref.h"
  
  class DeviceContext;
@@ -125,10 +121,10 @@ public:
       * \param[in]     dtPressureCouple         Period between pressure coupling steps
       * \param[in]     prVelocityScalingMatrix  Parrinello-Rahman velocity scaling matrix
       */
-    void integrate(const DeviceBuffer<float3>        d_x,
-                   DeviceBuffer<float3>              d_xp,
-                   DeviceBuffer<float3>              d_v,
-                   const DeviceBuffer<float3>        d_f,
+    void integrate(DeviceBuffer<Float3>              d_x,
+                   DeviceBuffer<Float3>              d_xp,
+                   DeviceBuffer<Float3>              d_v,
+                   const DeviceBuffer<Float3>        d_f,
                     const real                        dt,
                     const bool                        doTemperatureScaling,
                     gmx::ArrayRef<const t_grp_tcstat> tcstat,
@@ -192,7 +188,7 @@ private:
      int numTempScaleGroupsAlloc_ = -1;
  
      //! Vector with diagonal elements of the Parrinello-Rahman pressure coupling velocity rescale factors
-    float3 prVelocityScalingMatrixDiagonal_;
+    Float3 prVelocityScalingMatrixDiagonal_;
  };
  
  } // namespace gmx
diff --git a/src/gromacs/mdlib/leapfrog_gpu_sycl.cpp b/src/gromacs/mdlib/leapfrog_gpu_sycl.cpp

index 9afb0320fb23d3c72f8e0e4878a3a53169e0011e..b0e2583a3f33a5dc16b267db2e3c7d43fcc2f021 100644 (file)
--- a/src/gromacs/mdlib/leapfrog_gpu_sycl.cpp
+++ b/src/gromacs/mdlib/leapfrog_gpu_sycl.cpp
@@ -84,15 +84,15 @@ using cl::sycl::access::mode;
  template<NumTempScaleValues numTempScaleValues, VelocityScalingType velocityScaling>
  auto leapFrogKernel(
          cl::sycl::handler&                          cgh,
-        DeviceAccessor<float3, mode::read_write>    a_x,
-        DeviceAccessor<float3, mode::discard_write> a_xp,
-        DeviceAccessor<float3, mode::read_write>    a_v,
-        DeviceAccessor<float3, mode::read>          a_f,
+        DeviceAccessor<Float3, mode::read_write>    a_x,
+        DeviceAccessor<Float3, mode::discard_write> a_xp,
+        DeviceAccessor<Float3, mode::read_write>    a_v,
+        DeviceAccessor<Float3, mode::read>          a_f,
          DeviceAccessor<float, mode::read>           a_inverseMasses,
          float                                       dt,
          OptionalAccessor<float, mode::read, numTempScaleValues != NumTempScaleValues::None> a_lambdas,
          OptionalAccessor<unsigned short, mode::read, numTempScaleValues == NumTempScaleValues::Multiple> a_tempScaleGroups,
-        float3 prVelocityScalingMatrixDiagonal)
+        Float3 prVelocityScalingMatrixDiagonal)
  {
      cgh.require(a_x);
      cgh.require(a_xp);
@@ -109,9 +109,9 @@ auto leapFrogKernel(
      }
  
      return [=](cl::sycl::id<1> itemIdx) {
-        const float3 x    = a_x[itemIdx];
-        const float3 v    = a_v[itemIdx];
-        const float3 f    = a_f[itemIdx];
+        const Float3 x    = a_x[itemIdx];
+        const Float3 v    = a_v[itemIdx];
+        const Float3 f    = a_f[itemIdx];
          const float  im   = a_inverseMasses[itemIdx];
          const float  imdt = im * dt;
  
@@ -137,20 +137,20 @@ auto leapFrogKernel(
              }
          }();
  
-        const float3 prVelocityDelta = [=]() {
+        const Float3 prVelocityDelta = [=]() {
              if constexpr (velocityScaling == VelocityScalingType::Diagonal)
              {
-                return float3{ prVelocityScalingMatrixDiagonal[0] * v[0],
+                return Float3{ prVelocityScalingMatrixDiagonal[0] * v[0],
                                 prVelocityScalingMatrixDiagonal[1] * v[1],
                                 prVelocityScalingMatrixDiagonal[2] * v[2] };
              }
              else if constexpr (velocityScaling == VelocityScalingType::None)
              {
-                return float3{ 0, 0, 0 };
+                return Float3{ 0, 0, 0 };
              }
          }();
  
-        const float3 v_new = v * lambda - prVelocityDelta + f * imdt;
+        const Float3 v_new = v * lambda - prVelocityDelta + f * imdt;
          a_v[itemIdx]       = v_new;
          a_x[itemIdx]       = x + v_new * dt;
      };
@@ -216,10 +216,10 @@ static inline cl::sycl::event launchLeapFrogKernel(NumTempScaleValues  tempScali
              prVelocityScalingType);
  }
  
-void LeapFrogGpu::integrate(DeviceBuffer<float3>              d_x,
-                            DeviceBuffer<float3>              d_xp,
-                            DeviceBuffer<float3>              d_v,
-                            DeviceBuffer<float3>              d_f,
+void LeapFrogGpu::integrate(DeviceBuffer<Float3>              d_x,
+                            DeviceBuffer<Float3>              d_xp,
+                            DeviceBuffer<Float3>              d_v,
+                            DeviceBuffer<Float3>              d_f,
                              const real                        dt,
                              const bool                        doTemperatureScaling,
                              gmx::ArrayRef<const t_grp_tcstat> tcstat,
@@ -253,7 +253,7 @@ void LeapFrogGpu::integrate(DeviceBuffer<float3>              d_x,
                     "Fully anisotropic Parrinello-Rahman pressure coupling is not yet supported "
                     "in GPU version of Leap-Frog integrator.");
          prVelocityScalingMatrixDiagonal_ = dtPressureCouple
-                                           * float3{ prVelocityScalingMatrix[XX][XX],
+                                           * Float3{ prVelocityScalingMatrix[XX][XX],
                                                       prVelocityScalingMatrix[YY][YY],
                                                       prVelocityScalingMatrix[ZZ][ZZ] };
      }
diff --git a/src/gromacs/mdlib/lincs_gpu.cu b/src/gromacs/mdlib/lincs_gpu.cu

index 0967c20781b0470c595c3274d59f7eca877ad852..466c250f4c72eccba7c220f5d2da8b0faa68aa92 100644 (file)
--- a/src/gromacs/mdlib/lincs_gpu.cu
+++ b/src/gromacs/mdlib/lincs_gpu.cu
@@ -59,7 +59,8 @@
  #include "gromacs/gpu_utils/cuda_arch_utils.cuh"
  #include "gromacs/gpu_utils/cudautils.cuh"
  #include "gromacs/gpu_utils/devicebuffer.cuh"
-#include "gromacs/gpu_utils/gputraits.cuh"
+#include "gromacs/gpu_utils/gputraits.h"
+#include "gromacs/gpu_utils/typecasts.cuh"
  #include "gromacs/gpu_utils/vectype_ops.cuh"
  #include "gromacs/math/functions.h"
  #include "gromacs/math/vec.h"
@@ -427,14 +428,14 @@ inline auto getLincsKernelPtr(const bool updateVelocities, const bool computeVir
      return kernelPtr;
  }
  
-void LincsGpu::apply(const float3* d_x,
-                     float3*       d_xp,
-                     const bool    updateVelocities,
-                     float3*       d_v,
-                     const real    invdt,
-                     const bool    computeVirial,
-                     tensor        virialScaled,
-                     const PbcAiuc pbcAiuc)
+void LincsGpu::apply(const DeviceBuffer<Float3> d_x,
+                     DeviceBuffer<Float3>       d_xp,
+                     const bool                 updateVelocities,
+                     DeviceBuffer<Float3>       d_v,
+                     const real                 invdt,
+                     const bool                 computeVirial,
+                     tensor                     virialScaled,
+                     const PbcAiuc              pbcAiuc)
  {
      ensureNoPendingDeviceError("In CUDA version of LINCS");
  
@@ -479,8 +480,13 @@ void LincsGpu::apply(const float3* d_x,
  
      kernelParams_.pbcAiuc = pbcAiuc;
  
-    const auto kernelArgs =
-            prepareGpuKernelArguments(kernelPtr, config, &kernelParams_, &d_x, &d_xp, &d_v, &invdt);
+    const auto kernelArgs = prepareGpuKernelArguments(kernelPtr,
+                                                      config,
+                                                      &kernelParams_,
+                                                      asFloat3Pointer(&d_x),
+                                                      asFloat3Pointer(&d_xp),
+                                                      asFloat3Pointer(&d_v),
+                                                      &invdt);
  
      launchGpuKernel(kernelPtr,
                      config,
diff --git a/src/gromacs/mdlib/lincs_gpu.cuh b/src/gromacs/mdlib/lincs_gpu.cuh

index 40433efcb7a347ca2a4754c220dd7d3b647b90e2..6507892649456b31f11bed4dfd4787de5b5bc6f7 100644 (file)
--- a/src/gromacs/mdlib/lincs_gpu.cuh
+++ b/src/gromacs/mdlib/lincs_gpu.cuh
@@ -46,6 +46,7 @@
  
  #include <memory>
  
+#include "gromacs/gpu_utils/devicebuffer_datatype.h"
  #include "gromacs/gpu_utils/device_context.h"
  #include "gromacs/gpu_utils/device_stream.h"
  #include "gromacs/gpu_utils/gputraits.cuh"
@@ -134,14 +135,14 @@ public:
       * \param[in,out] virialScaled      Scaled virial tensor to be updated.
       * \param[in]     pbcAiuc           PBC data.
       */
-    void apply(const float3* d_x,
-               float3*       d_xp,
-               const bool    updateVelocities,
-               float3*       d_v,
-               const real    invdt,
-               const bool    computeVirial,
-               tensor        virialScaled,
-               const PbcAiuc pbcAiuc);
+    void apply(const DeviceBuffer<Float3> d_x,
+               DeviceBuffer<Float3>       d_xp,
+               const bool                 updateVelocities,
+               DeviceBuffer<Float3>       d_v,
+               const real                 invdt,
+               const bool                 computeVirial,
+               tensor                     virialScaled,
+               const PbcAiuc              pbcAiuc);
  
      /*! \brief
       * Update data-structures (e.g. after NB search step).
diff --git a/src/gromacs/mdlib/settle_gpu.cu b/src/gromacs/mdlib/settle_gpu.cu

index eb4b4957724c5e56733f79358f1b4515a208aeba..0cc25c50ce5e28b59a0ad4af7c646927a66b09ab 100644 (file)
--- a/src/gromacs/mdlib/settle_gpu.cu
+++ b/src/gromacs/mdlib/settle_gpu.cu
@@ -59,7 +59,8 @@
  #include "gromacs/gpu_utils/cuda_arch_utils.cuh"
  #include "gromacs/gpu_utils/cudautils.cuh"
  #include "gromacs/gpu_utils/devicebuffer.h"
-#include "gromacs/gpu_utils/gputraits.cuh"
+#include "gromacs/gpu_utils/gputraits.h"
+#include "gromacs/gpu_utils/typecasts.cuh"
  #include "gromacs/gpu_utils/vectype_ops.cuh"
  #include "gromacs/math/functions.h"
  #include "gromacs/math/vec.h"
@@ -396,14 +397,14 @@ inline auto getSettleKernelPtr(const bool updateVelocities, const bool computeVi
      return kernelPtr;
  }
  
-void SettleGpu::apply(const float3* d_x,
-                      float3*       d_xp,
-                      const bool    updateVelocities,
-                      float3*       d_v,
-                      const real    invdt,
-                      const bool    computeVirial,
-                      tensor        virialScaled,
-                      const PbcAiuc pbcAiuc)
+void SettleGpu::apply(const DeviceBuffer<Float3> d_x,
+                      DeviceBuffer<Float3>       d_xp,
+                      const bool                 updateVelocities,
+                      DeviceBuffer<Float3>       d_v,
+                      const real                 invdt,
+                      const bool                 computeVirial,
+                      tensor                     virialScaled,
+                      const PbcAiuc              pbcAiuc)
  {
  
      ensureNoPendingDeviceError("In CUDA version SETTLE");
@@ -440,8 +441,17 @@ void SettleGpu::apply(const float3* d_x,
          config.sharedMemorySize = 0;
      }
  
-    const auto kernelArgs = prepareGpuKernelArguments(
-            kernelPtr, config, &numSettles_, &d_atomIds_, &settleParameters_, &d_x, &d_xp, &invdt, &d_v, &d_virialScaled_, &pbcAiuc);
+    const auto kernelArgs = prepareGpuKernelArguments(kernelPtr,
+                                                      config,
+                                                      &numSettles_,
+                                                      &d_atomIds_,
+                                                      &settleParameters_,
+                                                      asFloat3Pointer(&d_x),
+                                                      asFloat3Pointer(&d_xp),
+                                                      &invdt,
+                                                      asFloat3Pointer(&d_v),
+                                                      &d_virialScaled_,
+                                                      &pbcAiuc);
  
      launchGpuKernel(kernelPtr,
                      config,
diff --git a/src/gromacs/mdlib/settle_gpu.cuh b/src/gromacs/mdlib/settle_gpu.cuh

index 3a96ec4d3921bb20ea3f3ea3d60a0a2baf86dd24..f09fbd344f4a37b96c5e21fc7fcebade243fd0c4 100644 (file)
--- a/src/gromacs/mdlib/settle_gpu.cuh
+++ b/src/gromacs/mdlib/settle_gpu.cuh
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -45,9 +45,10 @@
  
  #include "gmxpre.h"
  
+#include "gromacs/gpu_utils/devicebuffer_datatype.h"
  #include "gromacs/gpu_utils/device_context.h"
  #include "gromacs/gpu_utils/device_stream.h"
-#include "gromacs/gpu_utils/gputraits.cuh"
+#include "gromacs/gpu_utils/gputraits.h"
  #include "gromacs/math/functions.h"
  #include "gromacs/math/invertmatrix.h"
  #include "gromacs/math/vec.h"
@@ -101,14 +102,14 @@ public:
       * \param[in,out] virialScaled      Scaled virial tensor to be updated.
       * \param[in]     pbcAiuc           PBC data.
       */
-    void apply(const float3* d_x,
-               float3*       d_xp,
-               const bool    updateVelocities,
-               float3*       d_v,
-               const real    invdt,
-               const bool    computeVirial,
-               tensor        virialScaled,
-               const PbcAiuc pbcAiuc);
+    void apply(const DeviceBuffer<Float3> d_x,
+               DeviceBuffer<Float3>       d_xp,
+               const bool                 updateVelocities,
+               DeviceBuffer<Float3>       d_v,
+               const real                 invdt,
+               const bool                 computeVirial,
+               tensor                     virialScaled,
+               const PbcAiuc              pbcAiuc);
  
      /*! \brief
       * Update data-structures (e.g. after NB search step).
diff --git a/src/gromacs/mdlib/tests/constrtestrunners.cu b/src/gromacs/mdlib/tests/constrtestrunners.cu

index 94be8b35f9be92ba8f11c7437b3681e899f76951..c22b91f351f5943d3891d1b353e33d669743f723 100644 (file)
--- a/src/gromacs/mdlib/tests/constrtestrunners.cu
+++ b/src/gromacs/mdlib/tests/constrtestrunners.cu
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -52,6 +52,7 @@
  #include <vector>
  
  #include "gromacs/gpu_utils/devicebuffer.cuh"
+#include "gromacs/gpu_utils/gputraits.h"
  #include "gromacs/hardware/device_information.h"
  #include "gromacs/mdlib/lincs_gpu.cuh"
  #include "gromacs/pbcutil/pbc.h"
@@ -71,9 +72,14 @@ void LincsDeviceConstraintsRunner::applyConstraints(ConstraintsTestData* testDat
      auto lincsGpu = std::make_unique<LincsGpu>(
              testData->ir_.nLincsIter, testData->ir_.nProjOrder, deviceContext, deviceStream);
  
-    bool    updateVelocities = true;
-    int     numAtoms         = testData->numAtoms_;
-    float3 *d_x, *d_xp, *d_v;
+    bool updateVelocities = true;
+    int  numAtoms         = testData->numAtoms_;
+
+    Float3* h_x  = gmx::asGenericFloat3Pointer(testData->x_);
+    Float3* h_xp = gmx::asGenericFloat3Pointer(testData->xPrime_);
+    Float3* h_v  = gmx::asGenericFloat3Pointer(testData->v_);
+
+    DeviceBuffer<Float3> d_x, d_xp, d_v;
  
      lincsGpu->set(*testData->idef_, testData->numAtoms_, testData->invmass_.data());
      PbcAiuc pbcAiuc;
@@ -83,24 +89,19 @@ void LincsDeviceConstraintsRunner::applyConstraints(ConstraintsTestData* testDat
      allocateDeviceBuffer(&d_xp, numAtoms, deviceContext);
      allocateDeviceBuffer(&d_v, numAtoms, deviceContext);
  
-    copyToDeviceBuffer(
-            &d_x, (float3*)(testData->x_.data()), 0, numAtoms, deviceStream, GpuApiCallBehavior::Sync, nullptr);
-    copyToDeviceBuffer(
-            &d_xp, (float3*)(testData->xPrime_.data()), 0, numAtoms, deviceStream, GpuApiCallBehavior::Sync, nullptr);
+    copyToDeviceBuffer(&d_x, h_x, 0, numAtoms, deviceStream, GpuApiCallBehavior::Sync, nullptr);
+    copyToDeviceBuffer(&d_xp, h_xp, 0, numAtoms, deviceStream, GpuApiCallBehavior::Sync, nullptr);
      if (updateVelocities)
      {
-        copyToDeviceBuffer(
-                &d_v, (float3*)(testData->v_.data()), 0, numAtoms, deviceStream, GpuApiCallBehavior::Sync, nullptr);
+        copyToDeviceBuffer(&d_v, h_v, 0, numAtoms, deviceStream, GpuApiCallBehavior::Sync, nullptr);
      }
      lincsGpu->apply(
              d_x, d_xp, updateVelocities, d_v, testData->invdt_, testData->computeVirial_, testData->virialScaled_, pbcAiuc);
  
-    copyFromDeviceBuffer(
-            (float3*)(testData->xPrime_.data()), &d_xp, 0, numAtoms, deviceStream, GpuApiCallBehavior::Sync, nullptr);
+    copyFromDeviceBuffer(h_xp, &d_xp, 0, numAtoms, deviceStream, GpuApiCallBehavior::Sync, nullptr);
      if (updateVelocities)
      {
-        copyFromDeviceBuffer(
-                (float3*)(testData->v_.data()), &d_v, 0, numAtoms, deviceStream, GpuApiCallBehavior::Sync, nullptr);
+        copyFromDeviceBuffer(h_v, &d_v, 0, numAtoms, deviceStream, GpuApiCallBehavior::Sync, nullptr);
      }
  
      freeDeviceBuffer(&d_x);
diff --git a/src/gromacs/mdlib/tests/leapfrogtestrunners_gpu.cpp b/src/gromacs/mdlib/tests/leapfrogtestrunners_gpu.cpp

index b85ed94b52ec3b49193984edf3073fc4b45d1297..97c9c2948125f4cc3b588cdb78506a5d13b02628 100644 (file)
--- a/src/gromacs/mdlib/tests/leapfrogtestrunners_gpu.cpp
+++ b/src/gromacs/mdlib/tests/leapfrogtestrunners_gpu.cpp
@@ -59,6 +59,7 @@
  #    include "gromacs/mdlib/leapfrog_gpu.h"
  #endif
  
+#include "gromacs/gpu_utils/gputraits.h"
  #include "gromacs/hardware/device_information.h"
  #include "gromacs/mdlib/stat.h"
  
@@ -76,14 +77,12 @@ void LeapFrogDeviceTestRunner::integrate(LeapFrogTestData* testData, int numStep
  
      int numAtoms = testData->numAtoms_;
  
-    static_assert(sizeof(float3) == sizeof(*testData->x_.data()), "Incompatible types");
+    Float3* h_x  = gmx::asGenericFloat3Pointer(testData->x_);
+    Float3* h_xp = gmx::asGenericFloat3Pointer(testData->xPrime_);
+    Float3* h_v  = gmx::asGenericFloat3Pointer(testData->v_);
+    Float3* h_f  = gmx::asGenericFloat3Pointer(testData->f_);
  
-    float3* h_x  = reinterpret_cast<float3*>(testData->x_.data());
-    float3* h_xp = reinterpret_cast<float3*>(testData->xPrime_.data());
-    float3* h_v  = reinterpret_cast<float3*>(testData->v_.data());
-    float3* h_f  = reinterpret_cast<float3*>(testData->f_.data());
-
-    DeviceBuffer<float3> d_x, d_xp, d_v, d_f;
+    DeviceBuffer<Float3> d_x, d_xp, d_v, d_f;
  
      allocateDeviceBuffer(&d_x, numAtoms, deviceContext);
      allocateDeviceBuffer(&d_xp, numAtoms, deviceContext);
diff --git a/src/gromacs/mdlib/tests/settletestrunners.cu b/src/gromacs/mdlib/tests/settletestrunners.cu

index 0d2a3e116bc92c19852f6af5a2acccf3c0c6276b..7aab750547e6509cdbfadcfbcab24b0a68256e69 100644 (file)
--- a/src/gromacs/mdlib/tests/settletestrunners.cu
+++ b/src/gromacs/mdlib/tests/settletestrunners.cu
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -52,6 +52,7 @@
  #include <vector>
  
  #include "gromacs/gpu_utils/devicebuffer.cuh"
+#include "gromacs/gpu_utils/gputraits.h"
  #include "gromacs/hardware/device_information.h"
  #include "gromacs/mdlib/settle_gpu.cuh"
  #include "gromacs/utility/unique_cptr.h"
@@ -85,30 +86,29 @@ void SettleDeviceTestRunner::applySettle(SettleTestData* testData,
  
      int numAtoms = testData->numAtoms_;
  
-    float3 *d_x, *d_xp, *d_v;
+    DeviceBuffer<Float3> d_x, d_xp, d_v;
  
-    float3* h_x  = (float3*)(as_rvec_array(testData->x_.data()));
-    float3* h_xp = (float3*)(as_rvec_array(testData->xPrime_.data()));
-    float3* h_v  = (float3*)(as_rvec_array(testData->v_.data()));
+    Float3* h_x  = gmx::asGenericFloat3Pointer(testData->x_);
+    Float3* h_xp = gmx::asGenericFloat3Pointer(testData->xPrime_);
+    Float3* h_v  = gmx::asGenericFloat3Pointer(testData->v_);
  
      allocateDeviceBuffer(&d_x, numAtoms, deviceContext);
      allocateDeviceBuffer(&d_xp, numAtoms, deviceContext);
      allocateDeviceBuffer(&d_v, numAtoms, deviceContext);
  
-    copyToDeviceBuffer(&d_x, (float3*)h_x, 0, numAtoms, deviceStream, GpuApiCallBehavior::Sync, nullptr);
-    copyToDeviceBuffer(&d_xp, (float3*)h_xp, 0, numAtoms, deviceStream, GpuApiCallBehavior::Sync, nullptr);
+    copyToDeviceBuffer(&d_x, h_x, 0, numAtoms, deviceStream, GpuApiCallBehavior::Sync, nullptr);
+    copyToDeviceBuffer(&d_xp, h_xp, 0, numAtoms, deviceStream, GpuApiCallBehavior::Sync, nullptr);
      if (updateVelocities)
      {
-        copyToDeviceBuffer(&d_v, (float3*)h_v, 0, numAtoms, deviceStream, GpuApiCallBehavior::Sync, nullptr);
+        copyToDeviceBuffer(&d_v, h_v, 0, numAtoms, deviceStream, GpuApiCallBehavior::Sync, nullptr);
      }
      settleGpu->apply(
              d_x, d_xp, updateVelocities, d_v, testData->reciprocalTimeStep_, calcVirial, testData->virial_, pbcAiuc);
  
-    copyFromDeviceBuffer((float3*)h_xp, &d_xp, 0, numAtoms, deviceStream, GpuApiCallBehavior::Sync, nullptr);
+    copyFromDeviceBuffer(h_xp, &d_xp, 0, numAtoms, deviceStream, GpuApiCallBehavior::Sync, nullptr);
      if (updateVelocities)
      {
-        copyFromDeviceBuffer(
-                (float3*)h_v, &d_v, 0, numAtoms, deviceStream, GpuApiCallBehavior::Sync, nullptr);
+        copyFromDeviceBuffer(h_v, &d_v, 0, numAtoms, deviceStream, GpuApiCallBehavior::Sync, nullptr);
      }
  
      freeDeviceBuffer(&d_x);
diff --git a/src/gromacs/mdlib/update_constrain_gpu_impl.cu b/src/gromacs/mdlib/update_constrain_gpu_impl.cu

index f03ab778c3951b84a1f83ee7243bc07c96ed5952..b56185651170a6895d88cffe31ac20f3e2760bde 100644 (file)
--- a/src/gromacs/mdlib/update_constrain_gpu_impl.cu
+++ b/src/gromacs/mdlib/update_constrain_gpu_impl.cu
@@ -240,9 +240,9 @@ UpdateConstrainGpu::Impl::Impl(const t_inputrec&     ir,
  
  UpdateConstrainGpu::Impl::~Impl() {}
  
-void UpdateConstrainGpu::Impl::set(DeviceBuffer<RVec>            d_x,
-                                   DeviceBuffer<RVec>            d_v,
-                                   const DeviceBuffer<RVec>      d_f,
+void UpdateConstrainGpu::Impl::set(DeviceBuffer<Float3>          d_x,
+                                   DeviceBuffer<Float3>          d_v,
+                                   const DeviceBuffer<Float3>    d_f,
                                     const InteractionDefinitions& idef,
                                     const t_mdatoms&              md)
  {
@@ -254,9 +254,9 @@ void UpdateConstrainGpu::Impl::set(DeviceBuffer<RVec>            d_x,
      GMX_ASSERT(d_v != nullptr, "Velocities device buffer should not be null.");
      GMX_ASSERT(d_f != nullptr, "Forces device buffer should not be null.");
  
-    d_x_ = reinterpret_cast<float3*>(d_x);
-    d_v_ = reinterpret_cast<float3*>(d_v);
-    d_f_ = reinterpret_cast<float3*>(d_f);
+    d_x_ = d_x;
+    d_v_ = d_v;
+    d_f_ = d_f;
  
      numAtoms_ = md.nr;
  
@@ -334,9 +334,9 @@ void UpdateConstrainGpu::scaleVelocities(const matrix scalingMatrix)
      impl_->scaleVelocities(scalingMatrix);
  }
  
-void UpdateConstrainGpu::set(DeviceBuffer<RVec>            d_x,
-                             DeviceBuffer<RVec>            d_v,
-                             const DeviceBuffer<RVec>      d_f,
+void UpdateConstrainGpu::set(DeviceBuffer<Float3>          d_x,
+                             DeviceBuffer<Float3>          d_v,
+                             const DeviceBuffer<Float3>    d_f,
                               const InteractionDefinitions& idef,
                               const t_mdatoms&              md)
  {
diff --git a/src/gromacs/mdlib/update_constrain_gpu_impl.h b/src/gromacs/mdlib/update_constrain_gpu_impl.h

index 8e101b8cd0ed51ffb1c73b474c3b000ea09bd138..76e41398e762aaa941d41b24e7968bd0e242421e 100644 (file)
--- a/src/gromacs/mdlib/update_constrain_gpu_impl.h
+++ b/src/gromacs/mdlib/update_constrain_gpu_impl.h
@@ -150,9 +150,9 @@ public:
       * \param[in] idef                System topology
       * \param[in] md                  Atoms data.
       */
-    void set(DeviceBuffer<RVec>            d_x,
-             DeviceBuffer<RVec>            d_v,
-             const DeviceBuffer<RVec>      d_f,
+    void set(DeviceBuffer<Float3>          d_x,
+             DeviceBuffer<Float3>          d_v,
+             const DeviceBuffer<Float3>    d_f,
               const InteractionDefinitions& idef,
               const t_mdatoms&              md);
  
@@ -193,14 +193,14 @@ private:
      int numAtoms_;
  
      //! Local copy of the pointer to the device positions buffer
-    float3* d_x_;
+    DeviceBuffer<Float3> d_x_;
      //! Local copy of the pointer to the device velocities buffer
-    float3* d_v_;
+    DeviceBuffer<Float3> d_v_;
      //! Local copy of the pointer to the device forces buffer
-    float3* d_f_;
+    DeviceBuffer<Float3> d_f_;
  
      //! Device buffer for intermediate positions (maintained internally)
-    float3* d_xp_;
+    DeviceBuffer<Float3> d_xp_;
      //! Number of elements in shifted coordinates buffer
      int numXp_ = -1;
      //! Allocation size for the shifted coordinates buffer
@@ -208,7 +208,7 @@ private:
  
  
      //! 1/mass for all atoms (GPU)
-    real* d_inverseMasses_;
+    DeviceBuffer<real> d_inverseMasses_;
      //! Number of elements in reciprocal masses buffer
      int numInverseMasses_ = -1;
      //! Allocation size for the reciprocal masses buffer
diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu b/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu

index bd5fa8de5dcc74499f7722ddd14d46dce4be5fa1..76341b7a9742ac807d5d17f256302d76638b1c52 100644 (file)
--- a/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu
+++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu
@@ -778,7 +778,7 @@ void nbnxn_gpu_x_to_nbat_x(const Nbnxm::Grid&        grid,
      nbnxnInsertNonlocalGpuDependency(nb, interactionLoc);
  }
  
-void* getGpuForces(NbnxmGpu* nb)
+DeviceBuffer<Float3> getGpuForces(NbnxmGpu* nb)
  {
      return nb->atdat->f;
  }
diff --git a/src/gromacs/nbnxm/nbnxm.cpp b/src/gromacs/nbnxm/nbnxm.cpp

index a8e919ea01bdc9ac1d72c126feccbf82b1e42a4c..196befc92d22488042b9dc4c7c6d35834f5d0379 100644 (file)
--- a/src/gromacs/nbnxm/nbnxm.cpp
+++ b/src/gromacs/nbnxm/nbnxm.cpp
@@ -213,7 +213,7 @@ int nonbonded_verlet_t::getNumAtoms(const gmx::AtomLocality locality) const
      return numAtoms;
  }
  
-void* nonbonded_verlet_t::getGpuForces() const
+DeviceBuffer<gmx::RVec> nonbonded_verlet_t::getGpuForces() const
  {
      return Nbnxm::getGpuForces(gpu_nbv);
  }
diff --git a/src/gromacs/nbnxm/nbnxm.h b/src/gromacs/nbnxm/nbnxm.h

index 30380ebeb2e5bb715b1b6ff750ace2b5e72705d1..a36efad0233237db92deef65047ea47846db95ec 100644 (file)
--- a/src/gromacs/nbnxm/nbnxm.h
+++ b/src/gromacs/nbnxm/nbnxm.h
@@ -395,7 +395,7 @@ public:
       *
       * \returns A pointer to the force buffer in GPU memory
       */
-    void* getGpuForces() const;
+    DeviceBuffer<gmx::RVec> getGpuForces() const;
  
      //! Return the kernel setup
      const Nbnxm::KernelSetup& kernelSetup() const { return kernelSetup_; }
diff --git a/src/gromacs/nbnxm/nbnxm_gpu.h b/src/gromacs/nbnxm/nbnxm_gpu.h

index fec64bbe68a26af88fa96b339c838db14e807207..ebdba45b9d5358344ada92cdf4d485ee35883126 100644 (file)
--- a/src/gromacs/nbnxm/nbnxm_gpu.h
+++ b/src/gromacs/nbnxm/nbnxm_gpu.h
@@ -313,7 +313,8 @@ void nbnxn_wait_x_on_device(NbnxmGpu gmx_unused* nb) CUDA_FUNC_TERM;
   * \returns       A pointer to the force buffer in GPU memory
   */
  CUDA_FUNC_QUALIFIER
-void* getGpuForces(NbnxmGpu gmx_unused* nb) CUDA_FUNC_TERM_WITH_RETURN(nullptr);
+DeviceBuffer<gmx::RVec> getGpuForces(NbnxmGpu gmx_unused* nb)
+        CUDA_FUNC_TERM_WITH_RETURN(DeviceBuffer<gmx::RVec>{});
  
  } // namespace Nbnxm
  #endif
author	Andrey Alekseenko <al42and@gmail.com>
	Thu, 11 Mar 2021 10:48:06 +0000 (13:48 +0300)
committer	Szilárd Páll <pall.szilard@gmail.com>
	Wed, 17 Mar 2021 18:22:29 +0000 (18:22 +0000)
src/gromacs/gpu_utils/gputraits.h		patch \| blob \| history
src/gromacs/gpu_utils/typecasts.cuh		patch \| blob \| history
src/gromacs/mdlib/gpuforcereduction.h		patch \| blob \| history
src/gromacs/mdlib/gpuforcereduction_impl.cpp		patch \| blob \| history
src/gromacs/mdlib/gpuforcereduction_impl.cu		patch \| blob \| history
src/gromacs/mdlib/gpuforcereduction_impl.cuh		patch \| blob \| history
src/gromacs/mdlib/leapfrog_gpu.cu		patch \| blob \| history
src/gromacs/mdlib/leapfrog_gpu.h		patch \| blob \| history
src/gromacs/mdlib/leapfrog_gpu_sycl.cpp		patch \| blob \| history
src/gromacs/mdlib/lincs_gpu.cu		patch \| blob \| history
src/gromacs/mdlib/lincs_gpu.cuh		patch \| blob \| history
src/gromacs/mdlib/settle_gpu.cu		patch \| blob \| history
src/gromacs/mdlib/settle_gpu.cuh		patch \| blob \| history
src/gromacs/mdlib/tests/constrtestrunners.cu		patch \| blob \| history
src/gromacs/mdlib/tests/leapfrogtestrunners_gpu.cpp		patch \| blob \| history
src/gromacs/mdlib/tests/settletestrunners.cu		patch \| blob \| history
src/gromacs/mdlib/update_constrain_gpu_impl.cu		patch \| blob \| history
src/gromacs/mdlib/update_constrain_gpu_impl.h		patch \| blob \| history
src/gromacs/nbnxm/cuda/nbnxm_cuda.cu		patch \| blob \| history
src/gromacs/nbnxm/nbnxm.cpp		patch \| blob \| history
src/gromacs/nbnxm/nbnxm.h		patch \| blob \| history
src/gromacs/nbnxm/nbnxm_gpu.h		patch \| blob \| history