Use host buffer instead of host accessors in SYCL LeapFrog

author Andrey Alekseenko <al42and@gmail.com>

Wed, 14 Apr 2021 17:03:23 +0000 (17:03 +0000)

committer Artem Zhmurov <zhmurov@gmail.com>

Wed, 14 Apr 2021 17:03:23 +0000 (17:03 +0000)
author Andrey Alekseenko <al42and@gmail.com>
Wed, 14 Apr 2021 17:03:23 +0000 (17:03 +0000)
committer Artem Zhmurov <zhmurov@gmail.com>
Wed, 14 Apr 2021 17:03:23 +0000 (17:03 +0000)
diff --git a/src/gromacs/mdlib/leapfrog_gpu.h b/src/gromacs/mdlib/leapfrog_gpu.h

index 738437aa0dd908be4107bc509c4efc4c8359ffe7..9102e2a27e9a81ea5ec7c954761c2c149caa8dcd 100644 (file)
--- a/src/gromacs/mdlib/leapfrog_gpu.h
+++ b/src/gromacs/mdlib/leapfrog_gpu.h
@@ -168,7 +168,6 @@ private:
      int numTempScaleValues_ = 0;
      /*! \brief Array with temperature scaling factors.
       * This is temporary solution to remap data from t_grp_tcstat into plain array.
-     * Not used in SYCL.
       * \todo Replace with better solution.
       */
      gmx::HostVector<float> h_lambdas_;
diff --git a/src/gromacs/mdlib/leapfrog_gpu_sycl.cpp b/src/gromacs/mdlib/leapfrog_gpu_sycl.cpp

index b0e2583a3f33a5dc16b267db2e3c7d43fcc2f021..89997a69ca7c643b37293ae0cf6bfe45fd45b905 100644 (file)
--- a/src/gromacs/mdlib/leapfrog_gpu_sycl.cpp
+++ b/src/gromacs/mdlib/leapfrog_gpu_sycl.cpp
@@ -232,13 +232,25 @@ void LeapFrogGpu::integrate(DeviceBuffer<Float3>              d_x,
          GMX_ASSERT(checkDeviceBuffer(d_lambdas_, numTempScaleValues_),
                     "Number of temperature scaling factors changed since it was set for the "
                     "last time.");
-        { // Explicitly limiting the scope of host accessor. Not strictly necessary here.
-            auto ha_lambdas_ = d_lambdas_.buffer_->get_access<mode::discard_write>();
-            for (int i = 0; i < numTempScaleValues_; i++)
-            {
-                ha_lambdas_[i] = tcstat[i].lambda;
-            }
+        GMX_RELEASE_ASSERT(gmx::ssize(h_lambdas_) == numTempScaleValues_,
+                           "Number of temperature scaling factors changed since it was set for the "
+                           "last time.");
+        /* We could use host accessors here, without h_lambdas_.
+         * According to a quick test, host accessor is slightly faster when using DPC++ and
+         * LevelZero compared to using h_lambdas_ + cgh.copy. But with DPC++ and OpenCL, the host
+         * accessor waits for fReadyOnDevice in UpdateConstrainGpu::Impl::integrate. See #4023. */
+
+        for (int i = 0; i < numTempScaleValues_; i++)
+        {
+            h_lambdas_[i] = tcstat[i].lambda;
          }
+        copyToDeviceBuffer(&d_lambdas_,
+                           h_lambdas_.data(),
+                           0,
+                           numTempScaleValues_,
+                           deviceStream_,
+                           GpuApiCallBehavior::Async,
+                           nullptr);
      }
      NumTempScaleValues tempVelocityScalingType =
              getTempScalingType(doTemperatureScaling, numTempScaleValues_);
@@ -284,6 +296,7 @@ LeapFrogGpu::LeapFrogGpu(const DeviceContext& deviceContext,
      // If the temperature coupling is enabled, we need to make space for scaling factors
      if (numTempScaleValues_ > 0)
      {
+        h_lambdas_.resize(numTempScaleValues_);
          reallocateDeviceBuffer(
                  &d_lambdas_, numTempScaleValues_, &numLambdas_, &numLambdasAlloc_, deviceContext_);
      }
author	Andrey Alekseenko <al42and@gmail.com>
	Wed, 14 Apr 2021 17:03:23 +0000 (17:03 +0000)
committer	Artem Zhmurov <zhmurov@gmail.com>
	Wed, 14 Apr 2021 17:03:23 +0000 (17:03 +0000)
src/gromacs/mdlib/leapfrog_gpu.h		patch \| blob \| history
src/gromacs/mdlib/leapfrog_gpu_sycl.cpp		patch \| blob \| history