Activate GPU update support in SYCL build

[alexxy/gromacs.git] / src / gromacs / mdlib / update_constrain_gpu_impl.cpp
diff --git a/src/gromacs/mdlib/update_constrain_gpu_impl.cpp b/src/gromacs/mdlib/update_constrain_gpu_impl.cpp

index 919ba9b36b97a81b5f86e0a8200752be1956321f..9b8d102d2533889d8a305248b7d939f08ec58954 100644 (file)
--- a/src/gromacs/mdlib/update_constrain_gpu_impl.cpp
+++ b/src/gromacs/mdlib/update_constrain_gpu_impl.cpp
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -34,7 +34,12 @@
   */
  /*! \internal \file
   *
- * \brief Stub for update and constraints class CPU implementation.
+ * \brief Implements update and constraints class.
+ *
+ * The class combines Leap-Frog integrator with LINCS and SETTLE constraints.
+ *
+ * \todo The computational procedures in members should be integrated to improve
+ *       computational performance.
   *
   * \author Artem Zhmurov <zhmurov@gmail.com>
   *
@@ -42,81 +47,250 @@
   */
  #include "gmxpre.h"
  
-#include "config.h"
+#include "update_constrain_gpu_impl.h"
+
+#include <assert.h>
+#include <stdio.h>
+
+#include <cmath>
  
+#include <algorithm>
+
+#include "gromacs/gpu_utils/device_context.h"
+#include "gromacs/gpu_utils/device_stream.h"
+#include "gromacs/gpu_utils/devicebuffer.h"
+#include "gromacs/gpu_utils/gpueventsynchronizer.h"
+#include "gromacs/gpu_utils/gputraits.h"
+#include "gromacs/mdlib/leapfrog_gpu.h"
  #include "gromacs/mdlib/update_constrain_gpu.h"
+#include "gromacs/mdlib/update_constrain_gpu_internal.h"
+#include "gromacs/mdtypes/mdatom.h"
+#include "gromacs/timing/wallcycle.h"
+#include "gromacs/topology/mtop_util.h"
  
-#if GMX_GPU != GMX_GPU_CUDA
+static constexpr bool sc_haveGpuConstraintSupport = GMX_GPU_CUDA || GMX_GPU_SYCL;
  
  namespace gmx
  {
  
-class UpdateConstrainGpu::Impl
+void UpdateConstrainGpu::Impl::integrate(GpuEventSynchronizer*             fReadyOnDevice,
+                                         const real                        dt,
+                                         const bool                        updateVelocities,
+                                         const bool                        computeVirial,
+                                         tensor                            virial,
+                                         const bool                        doTemperatureScaling,
+                                         gmx::ArrayRef<const t_grp_tcstat> tcstat,
+                                         const bool                        doParrinelloRahman,
+                                         const float                       dtPressureCouple,
+                                         const matrix                      prVelocityScalingMatrix)
+{
+    wallcycle_start_nocount(wcycle_, WallCycleCounter::LaunchGpu);
+    wallcycle_sub_start(wcycle_, WallCycleSubCounter::LaunchGpuUpdateConstrain);
+
+    // Clearing virial matrix
+    // TODO There is no point in having separate virial matrix for constraints
+    clear_mat(virial);
+
+    // Make sure that the forces are ready on device before proceeding with the update.
+    fReadyOnDevice->enqueueWaitEvent(deviceStream_);
+
+    // The integrate should save a copy of the current coordinates in d_xp_ and write updated
+    // once into d_x_. The d_xp_ is only needed by constraints.
+    integrator_->integrate(
+            d_x_, d_xp_, d_v_, d_f_, dt, doTemperatureScaling, tcstat, doParrinelloRahman, dtPressureCouple, prVelocityScalingMatrix);
+    // Constraints need both coordinates before (d_x_) and after (d_xp_) update. However, after constraints
+    // are applied, the d_x_ can be discarded. So we intentionally swap the d_x_ and d_xp_ here to avoid the
+    // d_xp_ -> d_x_ copy after constraints. Note that the integrate saves them in the wrong order as well.
+    if (sc_haveGpuConstraintSupport)
+    {
+        lincsGpu_->apply(d_xp_, d_x_, updateVelocities, d_v_, 1.0 / dt, computeVirial, virial, pbcAiuc_);
+        settleGpu_->apply(d_xp_, d_x_, updateVelocities, d_v_, 1.0 / dt, computeVirial, virial, pbcAiuc_);
+    }
+
+    // scaledVirial -> virial (methods above returns scaled values)
+    float scaleFactor = 0.5F / (dt * dt);
+    for (int i = 0; i < DIM; i++)
+    {
+        for (int j = 0; j < DIM; j++)
+        {
+            virial[i][j] = scaleFactor * virial[i][j];
+        }
+    }
+
+    xUpdatedOnDeviceEvent_.markEvent(deviceStream_);
+
+    wallcycle_sub_stop(wcycle_, WallCycleSubCounter::LaunchGpuUpdateConstrain);
+    wallcycle_stop(wcycle_, WallCycleCounter::LaunchGpu);
+}
+
+void UpdateConstrainGpu::Impl::scaleCoordinates(const matrix scalingMatrix)
+{
+    wallcycle_start_nocount(wcycle_, WallCycleCounter::LaunchGpu);
+    wallcycle_sub_start(wcycle_, WallCycleSubCounter::LaunchGpuUpdateConstrain);
+
+    ScalingMatrix mu(scalingMatrix);
+
+    launchScaleCoordinatesKernel(numAtoms_, d_x_, mu, deviceStream_);
+
+    wallcycle_sub_stop(wcycle_, WallCycleSubCounter::LaunchGpuUpdateConstrain);
+    wallcycle_stop(wcycle_, WallCycleCounter::LaunchGpu);
+}
+
+void UpdateConstrainGpu::Impl::scaleVelocities(const matrix scalingMatrix)
  {
-};
+    wallcycle_start_nocount(wcycle_, WallCycleCounter::LaunchGpu);
+    wallcycle_sub_start(wcycle_, WallCycleSubCounter::LaunchGpuUpdateConstrain);
  
-UpdateConstrainGpu::UpdateConstrainGpu(const t_inputrec& /* ir   */,
-                                       const gmx_mtop_t& /* mtop */,
-                                       const void* /* commandStream */,
-                                       GpuEventSynchronizer* /* xUpdatedOnDevice */) :
-    impl_(nullptr)
+    ScalingMatrix mu(scalingMatrix);
+
+    launchScaleCoordinatesKernel(numAtoms_, d_v_, mu, deviceStream_);
+
+    wallcycle_sub_stop(wcycle_, WallCycleSubCounter::LaunchGpuUpdateConstrain);
+    wallcycle_stop(wcycle_, WallCycleCounter::LaunchGpu);
+}
+
+UpdateConstrainGpu::Impl::Impl(const t_inputrec&    ir,
+                               const gmx_mtop_t&    mtop,
+                               const int            numTempScaleValues,
+                               const DeviceContext& deviceContext,
+                               const DeviceStream&  deviceStream,
+                               gmx_wallcycle*       wcycle) :
+    deviceContext_(deviceContext), deviceStream_(deviceStream), wcycle_(wcycle)
+{
+    integrator_ = std::make_unique<LeapFrogGpu>(deviceContext_, deviceStream_, numTempScaleValues);
+    if (sc_haveGpuConstraintSupport)
+    {
+        lincsGpu_ = std::make_unique<LincsGpu>(ir.nLincsIter, ir.nProjOrder, deviceContext_, deviceStream_);
+        settleGpu_ = std::make_unique<SettleGpu>(mtop, deviceContext_, deviceStream_);
+    }
+}
+
+UpdateConstrainGpu::Impl::~Impl() {}
+
+void UpdateConstrainGpu::Impl::set(DeviceBuffer<Float3>          d_x,
+                                   DeviceBuffer<Float3>          d_v,
+                                   const DeviceBuffer<Float3>    d_f,
+                                   const InteractionDefinitions& idef,
+                                   const t_mdatoms&              md)
+{
+    wallcycle_start_nocount(wcycle_, WallCycleCounter::LaunchGpu);
+    wallcycle_sub_start(wcycle_, WallCycleSubCounter::LaunchGpuUpdateConstrain);
+
+    GMX_ASSERT(d_x, "Coordinates device buffer should not be null.");
+    GMX_ASSERT(d_v, "Velocities device buffer should not be null.");
+    GMX_ASSERT(d_f, "Forces device buffer should not be null.");
+
+    d_x_ = d_x;
+    d_v_ = d_v;
+    d_f_ = d_f;
+
+    numAtoms_ = md.nr;
+
+    reallocateDeviceBuffer(&d_xp_, numAtoms_, &numXp_, &numXpAlloc_, deviceContext_);
+
+    reallocateDeviceBuffer(
+            &d_inverseMasses_, numAtoms_, &numInverseMasses_, &numInverseMassesAlloc_, deviceContext_);
+
+    // Integrator should also update something, but it does not even have a method yet
+    integrator_->set(numAtoms_, md.invmass, md.cTC);
+    if (sc_haveGpuConstraintSupport)
+    {
+        lincsGpu_->set(idef, numAtoms_, md.invmass);
+        settleGpu_->set(idef);
+    }
+    else
+    {
+        GMX_ASSERT(idef.il[F_SETTLE].empty(), "SETTLE not supported");
+        GMX_ASSERT(idef.il[F_CONSTR].empty(), "LINCS not supported");
+    }
+
+    wallcycle_sub_stop(wcycle_, WallCycleSubCounter::LaunchGpuUpdateConstrain);
+    wallcycle_stop(wcycle_, WallCycleCounter::LaunchGpu);
+}
+
+void UpdateConstrainGpu::Impl::setPbc(const PbcType pbcType, const matrix box)
+{
+    // TODO wallcycle
+    setPbcAiuc(numPbcDimensions(pbcType), box, &pbcAiuc_);
+}
+
+GpuEventSynchronizer* UpdateConstrainGpu::Impl::xUpdatedOnDeviceEvent()
+{
+    return &xUpdatedOnDeviceEvent_;
+}
+
+UpdateConstrainGpu::UpdateConstrainGpu(const t_inputrec&    ir,
+                                       const gmx_mtop_t&    mtop,
+                                       const int            numTempScaleValues,
+                                       const DeviceContext& deviceContext,
+                                       const DeviceStream&  deviceStream,
+                                       gmx_wallcycle*       wcycle) :
+    impl_(new Impl(ir, mtop, numTempScaleValues, deviceContext, deviceStream, wcycle))
  {
-    GMX_ASSERT(false,
-               "A CPU stub for UpdateConstrain was called instead of the correct implementation.");
  }
  
  UpdateConstrainGpu::~UpdateConstrainGpu() = default;
  
-void UpdateConstrainGpu::integrate(GpuEventSynchronizer* /* fReadyOnDevice */,
-                                   const real /* dt */,
-                                   const bool /* updateVelocities */,
-                                   const bool /* computeVirial */,
-                                   tensor /* virialScaled */,
-                                   const bool /* doTemperatureScaling */,
-                                   gmx::ArrayRef<const t_grp_tcstat> /* tcstat */,
-                                   const bool /* doParrinelloRahman */,
-                                   const float /* dtPressureCouple */,
-                                   const matrix /* prVelocityScalingMatrix*/)
+void UpdateConstrainGpu::integrate(GpuEventSynchronizer*             fReadyOnDevice,
+                                   const real                        dt,
+                                   const bool                        updateVelocities,
+                                   const bool                        computeVirial,
+                                   tensor                            virialScaled,
+                                   const bool                        doTemperatureScaling,
+                                   gmx::ArrayRef<const t_grp_tcstat> tcstat,
+                                   const bool                        doParrinelloRahman,
+                                   const float                       dtPressureCouple,
+                                   const matrix                      prVelocityScalingMatrix)
  {
-    GMX_ASSERT(false,
-               "A CPU stub for UpdateConstrain was called instead of the correct implementation.");
+    impl_->integrate(fReadyOnDevice,
+                     dt,
+                     updateVelocities,
+                     computeVirial,
+                     virialScaled,
+                     doTemperatureScaling,
+                     tcstat,
+                     doParrinelloRahman,
+                     dtPressureCouple,
+                     prVelocityScalingMatrix);
  }
  
-void UpdateConstrainGpu::scaleCoordinates(const matrix /* scalingMatrix */)
+void UpdateConstrainGpu::scaleCoordinates(const matrix scalingMatrix)
  {
-    GMX_ASSERT(false,
-               "A CPU stub for UpdateConstrain was called instead of the correct implementation.");
+    impl_->scaleCoordinates(scalingMatrix);
  }
  
-void UpdateConstrainGpu::set(DeviceBuffer<float> /* d_x */,
-                             DeviceBuffer<float> /* d_v */,
-                             const DeviceBuffer<float> /* d_f */,
-                             const t_idef& /* idef */,
-                             const t_mdatoms& /* md */,
-                             const int /* numTempScaleValues */)
+void UpdateConstrainGpu::scaleVelocities(const matrix scalingMatrix)
  {
-    GMX_ASSERT(false,
-               "A CPU stub for UpdateConstrain was called instead of the correct implementation.");
+    impl_->scaleVelocities(scalingMatrix);
  }
  
-void UpdateConstrainGpu::setPbc(const PbcType /* pbcType */, const matrix /* box */)
+void UpdateConstrainGpu::set(DeviceBuffer<Float3>          d_x,
+                             DeviceBuffer<Float3>          d_v,
+                             const DeviceBuffer<Float3>    d_f,
+                             const InteractionDefinitions& idef,
+                             const t_mdatoms&              md)
  {
-    GMX_ASSERT(false,
-               "A CPU stub for UpdateConstrain was called instead of the correct implementation.");
+    impl_->set(d_x, d_v, d_f, idef, md);
  }
  
-GpuEventSynchronizer* UpdateConstrainGpu::getCoordinatesReadySync()
+void UpdateConstrainGpu::setPbc(const PbcType pbcType, const matrix box)
  {
-    GMX_ASSERT(false,
-               "A CPU stub for UpdateConstrain was called instead of the correct implementation.");
-    return nullptr;
+    impl_->setPbc(pbcType, box);
  }
  
-bool UpdateConstrainGpu::isNumCoupledConstraintsSupported(const gmx_mtop_t& /* mtop */)
+GpuEventSynchronizer* UpdateConstrainGpu::xUpdatedOnDeviceEvent()
  {
-    return false;
+    return impl_->xUpdatedOnDeviceEvent();
  }
  
-} // namespace gmx
+bool UpdateConstrainGpu::isNumCoupledConstraintsSupported(const gmx_mtop_t& mtop)
+{
+    return LincsGpu::isNumCoupledConstraintsSupported(mtop);
+}
+
+bool UpdateConstrainGpu::areConstraintsSupported()
+{
+    return sc_haveGpuConstraintSupport;
+}
  
-#endif /* GMX_GPU != GMX_GPU_CUDA */
+} // namespace gmx