Add method for coordinates scaling into GPU update-constrain class

author Artem Zhmurov <zhmurov@gmail.com>

Tue, 12 Nov 2019 09:55:02 +0000 (10:55 +0100)

committer Mark Abraham <mark.j.abraham@gmail.com>

Wed, 27 Nov 2019 12:20:51 +0000 (13:20 +0100)
author Artem Zhmurov <zhmurov@gmail.com>
Tue, 12 Nov 2019 09:55:02 +0000 (10:55 +0100)
committer Mark Abraham <mark.j.abraham@gmail.com>
Wed, 27 Nov 2019 12:20:51 +0000 (13:20 +0100)
diff --git a/src/gromacs/mdlib/update_constrain_cuda.h b/src/gromacs/mdlib/update_constrain_cuda.h

index 90fb3679b3e1e446d737e26ab16c3fae0ad41330..40804bfec1b22b5f0b2fbbef97fe09fc0bf9bc35 100644 (file)
--- a/src/gromacs/mdlib/update_constrain_cuda.h
+++ b/src/gromacs/mdlib/update_constrain_cuda.h
@@ -116,6 +116,15 @@ public:
                     float                             dtPressureCouple,
                     const matrix                      velocityScalingMatrix);
  
+    /*! \brief Scale coordinates on the GPU for the pressure coupling.
+     *
+     * After pressure coupling step, the box size may change. Hence, the coordinates should be
+     * scaled so that all the particles fit in the new box.
+     *
+     * \param[in] scalingMatrix Coordinates scaling matrix.
+     */
+    void scaleCoordinates(const matrix scalingMatrix);
+
      /*! \brief Set the pointers and update data-structures (e.g. after NB search step).
       *
       * \param[in,out]  d_x                 Device buffer with coordinates.
diff --git a/src/gromacs/mdlib/update_constrain_cuda_impl.cpp b/src/gromacs/mdlib/update_constrain_cuda_impl.cpp

index 3f04316f2e806c9683520938b08852c9ad85566f..e95a33e3e250fd123e2b40e168558b5760219d15 100644 (file)
--- a/src/gromacs/mdlib/update_constrain_cuda_impl.cpp
+++ b/src/gromacs/mdlib/update_constrain_cuda_impl.cpp
@@ -82,6 +82,12 @@ void UpdateConstrainCuda::integrate(gmx_unused GpuEventSynchronizer* fReadyOnDev
                 "A CPU stub for UpdateConstrain was called instead of the correct implementation.");
  }
  
+void UpdateConstrainCuda::scaleCoordinates(gmx_unused const matrix scalingMatrix)
+{
+    GMX_ASSERT(false,
+               "A CPU stub for UpdateConstrain was called instead of the correct implementation.");
+}
+
  void UpdateConstrainCuda::set(gmx_unused DeviceBuffer<float> d_x,
                                gmx_unused DeviceBuffer<float> d_v,
                                gmx_unused const DeviceBuffer<float> d_f,
diff --git a/src/gromacs/mdlib/update_constrain_cuda_impl.cu b/src/gromacs/mdlib/update_constrain_cuda_impl.cu

index b6ebffb24fae953ba714d9d3fbe07c1a3b7d09fa..b4930e07ef9d242d9ef9857d444b6cc70814db26 100644 (file)
--- a/src/gromacs/mdlib/update_constrain_cuda_impl.cu
+++ b/src/gromacs/mdlib/update_constrain_cuda_impl.cu
@@ -67,6 +67,38 @@
  
  namespace gmx
  {
+/*!\brief Number of CUDA threads in a block
+ *
+ * \todo Check if using smaller block size will lead to better prformance.
+ */
+constexpr static int c_threadsPerBlock = 256;
+//! Maximum number of threads in a block (for __launch_bounds__)
+constexpr static int c_maxThreadsPerBlock = c_threadsPerBlock;
+
+/*! \brief Scaling matrix struct.
+ *
+ * \todo Should be generalized.
+ */
+struct ScalingMatrix
+{
+    float xx, yy, zz, yx, zx, zy;
+};
+
+__launch_bounds__(c_maxThreadsPerBlock) __global__
+        void scaleCoordinates_kernel(const int numAtoms, float3* __restrict__ gm_x, const ScalingMatrix scalingMatrix)
+{
+    int threadIndex = blockIdx.x * blockDim.x + threadIdx.x;
+    if (threadIndex < numAtoms)
+    {
+        float3 x = gm_x[threadIndex];
+
+        x.x = scalingMatrix.xx * x.x + scalingMatrix.yx * x.y + scalingMatrix.zx * x.z;
+        x.y = scalingMatrix.yy * x.y + scalingMatrix.zy * x.z;
+        x.z = scalingMatrix.zz * x.z;
+
+        gm_x[threadIndex] = x;
+    }
+}
  
  void UpdateConstrainCuda::Impl::integrate(GpuEventSynchronizer*             fReadyOnDevice,
                                            const real                        dt,
@@ -111,6 +143,25 @@ void UpdateConstrainCuda::Impl::integrate(GpuEventSynchronizer*             fRea
      return;
  }
  
+void UpdateConstrainCuda::Impl::scaleCoordinates(const matrix scalingMatrix)
+{
+    ScalingMatrix mu;
+    mu.xx = scalingMatrix[XX][XX];
+    mu.yy = scalingMatrix[YY][YY];
+    mu.zz = scalingMatrix[ZZ][ZZ];
+    mu.yx = scalingMatrix[YY][XX];
+    mu.zx = scalingMatrix[ZZ][XX];
+    mu.zy = scalingMatrix[ZZ][YY];
+
+    const auto kernelArgs = prepareGpuKernelArguments(
+            scaleCoordinates_kernel, coordinateScalingKernelLaunchConfig_, &numAtoms_, &d_x_, &mu);
+    launchGpuKernel(scaleCoordinates_kernel, coordinateScalingKernelLaunchConfig_, nullptr,
+                    "scaleCoordinates_kernel", kernelArgs);
+    // TODO: Although this only happens on the pressure coupling steps, this synchronization
+    //       can affect the perfornamce if nstpcouple is small.
+    gpuStreamSynchronize(commandStream_);
+}
+
  UpdateConstrainCuda::Impl::Impl(const t_inputrec&     ir,
                                  const gmx_mtop_t&     mtop,
                                  const void*           commandStream,
@@ -125,6 +176,12 @@ UpdateConstrainCuda::Impl::Impl(const t_inputrec&     ir,
      integrator_ = std::make_unique<LeapFrogCuda>(commandStream_);
      lincsCuda_  = std::make_unique<LincsCuda>(ir.nLincsIter, ir.nProjOrder, commandStream_);
      settleCuda_ = std::make_unique<SettleCuda>(mtop, commandStream_);
+
+    coordinateScalingKernelLaunchConfig_.blockSize[0]     = c_threadsPerBlock;
+    coordinateScalingKernelLaunchConfig_.blockSize[1]     = 1;
+    coordinateScalingKernelLaunchConfig_.blockSize[2]     = 1;
+    coordinateScalingKernelLaunchConfig_.sharedMemorySize = 0;
+    coordinateScalingKernelLaunchConfig_.stream           = commandStream_;
  }
  
  UpdateConstrainCuda::Impl::~Impl() {}
@@ -155,6 +212,9 @@ void UpdateConstrainCuda::Impl::set(DeviceBuffer<float>       d_x,
      integrator_->set(md, numTempScaleValues, md.cTC);
      lincsCuda_->set(idef, md);
      settleCuda_->set(idef, md);
+
+    coordinateScalingKernelLaunchConfig_.gridSize[0] =
+            (numAtoms_ + c_threadsPerBlock - 1) / c_threadsPerBlock;
  }
  
  void UpdateConstrainCuda::Impl::setPbc(const t_pbc* pbc)
@@ -195,6 +255,11 @@ void UpdateConstrainCuda::integrate(GpuEventSynchronizer*             fReadyOnDe
                       tcstat, doPressureCouple, dtPressureCouple, velocityScalingMatrix);
  }
  
+void UpdateConstrainCuda::scaleCoordinates(const matrix scalingMatrix)
+{
+    impl_->scaleCoordinates(scalingMatrix);
+}
+
  void UpdateConstrainCuda::set(DeviceBuffer<float>       d_x,
                                DeviceBuffer<float>       d_v,
                                const DeviceBuffer<float> d_f,
diff --git a/src/gromacs/mdlib/update_constrain_cuda_impl.h b/src/gromacs/mdlib/update_constrain_cuda_impl.h

index 88dcfb69a92aa2931855fc9eb278a1e416bf9b1a..08c3c1feaeaedf3c4de8e5f1fc7871cad69e92ed 100644 (file)
--- a/src/gromacs/mdlib/update_constrain_cuda_impl.h
+++ b/src/gromacs/mdlib/update_constrain_cuda_impl.h
@@ -115,6 +115,15 @@ public:
                     float                             dtPressureCouple,
                     const matrix                      velocityScalingMatrix);
  
+    /*! \brief Scale coordinates on the GPU for the pressure coupling.
+     *
+     * After pressure coupling step, the box size may change. Hence, the coordinates should be
+     * scaled so that all the particles fit in the new box.
+     *
+     * \param[in] scalingMatrix Coordinates scaling matrix.
+     */
+    void scaleCoordinates(const matrix scalingMatrix);
+
      /*! \brief Set the pointers and update data-structures (e.g. after NB search step).
       *
       * \param[in,out]  d_x            Device buffer with coordinates.
@@ -147,6 +156,8 @@ public:
  private:
      //! CUDA stream
      CommandStream commandStream_ = nullptr;
+    //! CUDA kernel launch config
+    KernelLaunchConfig coordinateScalingKernelLaunchConfig_;
  
      //! Periodic boundary data
      PbcAiuc pbcAiuc_;
author	Artem Zhmurov <zhmurov@gmail.com>
	Tue, 12 Nov 2019 09:55:02 +0000 (10:55 +0100)
committer	Mark Abraham <mark.j.abraham@gmail.com>
	Wed, 27 Nov 2019 12:20:51 +0000 (13:20 +0100)
src/gromacs/mdlib/update_constrain_cuda.h		patch \| blob \| history
src/gromacs/mdlib/update_constrain_cuda_impl.cpp		patch \| blob \| history
src/gromacs/mdlib/update_constrain_cuda_impl.cu		patch \| blob \| history
src/gromacs/mdlib/update_constrain_cuda_impl.h		patch \| blob \| history