SYCL: Shorten mangled kernel name types

author Andrey Alekseenko <al42and@gmail.com>

Fri, 11 Jun 2021 22:10:54 +0000 (01:10 +0300)

committer Andrey Alekseenko <al42and@gmail.com>

Fri, 11 Jun 2021 22:10:54 +0000 (01:10 +0300)
author Andrey Alekseenko <al42and@gmail.com>
Fri, 11 Jun 2021 22:10:54 +0000 (01:10 +0300)
committer Andrey Alekseenko <al42and@gmail.com>
Fri, 11 Jun 2021 22:10:54 +0000 (01:10 +0300)
diff --git a/src/gromacs/hardware/device_management_sycl.cpp b/src/gromacs/hardware/device_management_sycl.cpp

index 8923fe3e403fa0c4c2bcc74a54344c4fbdd9fdee..3b687f3660e7f382c654393aefb258737c7bc937 100644 (file)
--- a/src/gromacs/hardware/device_management_sycl.cpp
+++ b/src/gromacs/hardware/device_management_sycl.cpp
@@ -145,6 +145,9 @@ static DeviceStatus isDeviceCompatible(const cl::sycl::device& syclDevice)
      }
  }
  
+// Declaring the class here to avoid long unreadable name in the profiler report
+//! \brief Class name for test kernel
+class DummyKernel;
  
  /*!
   * \brief Checks that device \c deviceInfo is sane (ie can run a kernel).
@@ -168,7 +171,7 @@ static bool isDeviceFunctional(const cl::sycl::device& syclDevice, std::string*
          queue.submit([&](cl::sycl::handler& cgh) {
                   auto d_buffer = buffer.get_access<cl::sycl::access::mode::discard_write>(cgh);
                   cl::sycl::range<1> range{ numThreads };
-                 cgh.parallel_for<class DummyKernel>(range, [=](cl::sycl::id<1> threadId) {
+                 cgh.parallel_for<DummyKernel>(range, [=](cl::sycl::id<1> threadId) {
                       d_buffer[threadId] = threadId.get(0);
                   });
               }).wait_and_throw();
diff --git a/src/gromacs/mdlib/gpuforcereduction_impl_internal_sycl.cpp b/src/gromacs/mdlib/gpuforcereduction_impl_internal_sycl.cpp

index 3bc21c43de8ccfd860890d8f383aad18d0ec0397..e2ee7b46979d1bd12dccf8600faeed8f19553e51 100644 (file)
--- a/src/gromacs/mdlib/gpuforcereduction_impl_internal_sycl.cpp
+++ b/src/gromacs/mdlib/gpuforcereduction_impl_internal_sycl.cpp
@@ -54,6 +54,10 @@
  #include "gromacs/gpu_utils/gpueventsynchronizer_sycl.h"
  #include "gromacs/utility/template_mp.h"
  
+//! \brief Class name for reduction kernel
+template<bool addRvecForce, bool accumulateForce>
+class ReduceKernel;
+
  namespace gmx
  {
  
@@ -93,9 +97,6 @@ static auto reduceKernel(cl::sycl::handler&                                 cgh,
      };
  }
  
-template<bool addRvecForce, bool accumulateForce>
-class ReduceKernelName;
-
  template<bool addRvecForce, bool accumulateForce>
  static void launchReductionKernel_(const int                   numAtoms,
                                     const int                   atomStart,
@@ -114,7 +115,7 @@ static void launchReductionKernel_(const int                   numAtoms,
      queue.submit([&](cl::sycl::handler& cgh) {
          auto kernel = reduceKernel<addRvecForce, accumulateForce>(
                  cgh, b_nbnxmForce, b_rvecForceToAdd, b_forceTotal, b_cell, atomStart);
-        cgh.parallel_for<ReduceKernelName<addRvecForce, accumulateForce>>(rangeNumAtoms, kernel);
+        cgh.parallel_for<ReduceKernel<addRvecForce, accumulateForce>>(rangeNumAtoms, kernel);
      });
  }
  
diff --git a/src/gromacs/mdlib/leapfrog_gpu_internal_sycl.cpp b/src/gromacs/mdlib/leapfrog_gpu_internal_sycl.cpp

index 566c92b1b45e0d8512f222dc74de5df5a82218e8..d0d019c25ebe6faee2de372cb4c1322bcee26177 100644 (file)
--- a/src/gromacs/mdlib/leapfrog_gpu_internal_sycl.cpp
+++ b/src/gromacs/mdlib/leapfrog_gpu_internal_sycl.cpp
@@ -56,6 +56,10 @@
  #include "gromacs/utility/fatalerror.h"
  #include "gromacs/utility/template_mp.h"
  
+//! \brief Class name for leap-frog kernel
+template<gmx::NumTempScaleValues numTempScaleValues, gmx::VelocityScalingType velocityScaling>
+class LeapFrogKernel;
+
  namespace gmx
  {
  
@@ -156,15 +160,11 @@ auto leapFrogKernel(
      };
  }
  
-// SYCL 1.2.1 requires providing a unique type for a kernel. Should not be needed for SYCL2020.
-template<NumTempScaleValues numTempScaleValues, VelocityScalingType velocityScaling>
-class LeapFrogKernelName;
-
  template<NumTempScaleValues numTempScaleValues, VelocityScalingType velocityScaling, class... Args>
  static cl::sycl::event launchLeapFrogKernel(const DeviceStream& deviceStream, int numAtoms, Args&&... args)
  {
      // Should not be needed for SYCL2020.
-    using kernelNameType = LeapFrogKernelName<numTempScaleValues, velocityScaling>;
+    using kernelNameType = LeapFrogKernel<numTempScaleValues, velocityScaling>;
  
      const cl::sycl::range<1> rangeAllAtoms(numAtoms);
      cl::sycl::queue          q = deviceStream.stream();
diff --git a/src/gromacs/mdlib/update_constrain_gpu_internal_sycl.cpp b/src/gromacs/mdlib/update_constrain_gpu_internal_sycl.cpp

index 5f2233a7f195b640d7bf3d3647c120a9b6ddb5e8..1843b2f75b871d127bb1ade517e1bdaaa2ec2aae 100644 (file)
--- a/src/gromacs/mdlib/update_constrain_gpu_internal_sycl.cpp
+++ b/src/gromacs/mdlib/update_constrain_gpu_internal_sycl.cpp
@@ -49,6 +49,9 @@
  #include "gromacs/gpu_utils/gputraits_sycl.h"
  #include "gromacs/utility/gmxassert.h"
  
+//! \brief Class name for scaling kernel
+class ScaleKernel;
+
  namespace gmx
  {
  
@@ -77,7 +80,7 @@ void launchScaleCoordinatesKernel(const int            numAtoms,
  
      cl::sycl::event e = queue.submit([&](cl::sycl::handler& cgh) {
          auto kernel = scaleKernel(cgh, d_coordinates, mu);
-        cgh.parallel_for<class ScaleKernelName>(rangeAllAtoms, kernel);
+        cgh.parallel_for<ScaleKernel>(rangeAllAtoms, kernel);
      });
      // TODO: Although this only happens on the pressure coupling steps, this synchronization
      //       can affect the performance if nstpcouple is small. See Issue #4018
diff --git a/src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel.cpp b/src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel.cpp

index 664540f8571b616b47c2cec588198714426185b6..1a37f49dfff3969b17e9d9046152fd11518066e0 100644 (file)
--- a/src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel.cpp
+++ b/src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel.cpp
@@ -53,6 +53,10 @@
  #include "nbnxm_sycl_kernel_utils.h"
  #include "nbnxm_sycl_types.h"
  
+//! \brief Class name for NBNXM kernel
+template<bool doPruneNBL, bool doCalcEnergies, enum Nbnxm::ElecType elecType, enum Nbnxm::VdwType vdwType>
+class NbnxmKernel;
+
  namespace Nbnxm
  {
  
@@ -997,15 +1001,10 @@ auto nbnxmKernel(cl::sycl::handler&                                   cgh,
      };
  }
  
-// SYCL 1.2.1 requires providing a unique type for a kernel. Should not be needed for SYCL2020.
-template<bool doPruneNBL, bool doCalcEnergies, enum ElecType elecType, enum VdwType vdwType>
-class NbnxmKernelName;
-
  template<bool doPruneNBL, bool doCalcEnergies, enum ElecType elecType, enum VdwType vdwType, class... Args>
  cl::sycl::event launchNbnxmKernel(const DeviceStream& deviceStream, const int numSci, Args&&... args)
  {
-    // Should not be needed for SYCL2020.
-    using kernelNameType = NbnxmKernelName<doPruneNBL, doCalcEnergies, elecType, vdwType>;
+    using kernelNameType = NbnxmKernel<doPruneNBL, doCalcEnergies, elecType, vdwType>;
  
      /* Kernel launch config:
       * - The thread block dimensions match the size of i-clusters, j-clusters,
diff --git a/src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel_pruneonly.cpp b/src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel_pruneonly.cpp

index a2cc1f8a0ddc41f140ad364ff6e0fc5c1b5f52d2..7770915c4d84a6ac0864f3b25cb5b54de39bca79 100644 (file)
--- a/src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel_pruneonly.cpp
+++ b/src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel_pruneonly.cpp
@@ -54,6 +54,10 @@ using cl::sycl::access::fence_space;
  using cl::sycl::access::mode;
  using cl::sycl::access::target;
  
+//! \brief Class name for NBNXM prune-only kernel
+template<bool haveFreshList>
+class NbnxmKernelPruneOnly;
+
  namespace Nbnxm
  {
  
@@ -216,17 +220,12 @@ auto nbnxmKernelPruneOnly(cl::sycl::handler&                            cgh,
      };
  }
  
-// SYCL 1.2.1 requires providing a unique type for a kernel. Should not be needed for SYCL2020.
-template<bool haveFreshList>
-class NbnxmKernelPruneOnlyName;
-
  template<bool haveFreshList, class... Args>
  cl::sycl::event launchNbnxmKernelPruneOnly(const DeviceStream& deviceStream,
                                             const int           numSciInPart,
                                             Args&&... args)
  {
-    // Should not be needed for SYCL2020.
-    using kernelNameType = NbnxmKernelPruneOnlyName<haveFreshList>;
+    using kernelNameType = NbnxmKernelPruneOnly<haveFreshList>;
  
      /* Kernel launch config:
       * - The thread block dimensions match the size of i-clusters, j-clusters,
author	Andrey Alekseenko <al42and@gmail.com>
	Fri, 11 Jun 2021 22:10:54 +0000 (01:10 +0300)
committer	Andrey Alekseenko <al42and@gmail.com>
	Fri, 11 Jun 2021 22:10:54 +0000 (01:10 +0300)
src/gromacs/hardware/device_management_sycl.cpp		patch \| blob \| history
src/gromacs/mdlib/gpuforcereduction_impl_internal_sycl.cpp		patch \| blob \| history
src/gromacs/mdlib/leapfrog_gpu_internal_sycl.cpp		patch \| blob \| history
src/gromacs/mdlib/update_constrain_gpu_internal_sycl.cpp		patch \| blob \| history
src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel.cpp		patch \| blob \| history
src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel_pruneonly.cpp		patch \| blob \| history