Add missing Doxygen for SYCL functions and some others

author Andrey Alekseenko <al42and@gmail.com>

Tue, 24 Aug 2021 15:13:46 +0000 (18:13 +0300)

committer Mark Abraham <mark.j.abraham@gmail.com>

Thu, 26 Aug 2021 20:57:41 +0000 (20:57 +0000)
author Andrey Alekseenko <al42and@gmail.com>
Tue, 24 Aug 2021 15:13:46 +0000 (18:13 +0300)
committer Mark Abraham <mark.j.abraham@gmail.com>
Thu, 26 Aug 2021 20:57:41 +0000 (20:57 +0000)
diff --git a/src/gromacs/compat/mp11.h b/src/gromacs/compat/mp11.h

index 439084ed9cfae6c64e265575c1358febfdee6f60..d474bbe9aa57ba552287e7312bea77adc547af5f 100644 (file)
--- a/src/gromacs/compat/mp11.h
+++ b/src/gromacs/compat/mp11.h
@@ -1,7 +1,7 @@
  /*
   * This file is part of the GROMACS molecular simulation package.
   *
- * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -89,6 +89,8 @@ static auto mp_with_index(std::size_t i, F&& f)
      }
  }
  
+// Doxygen does not like recursive templates.
+//! \cond
  template<std::size_t N, class F, typename std::enable_if<(N > 1)>::type* = nullptr>
  static auto mp_with_index(std::size_t i, F&& f)
  {
@@ -101,7 +103,7 @@ static auto mp_with_index(std::size_t i, F&& f)
          return mp_with_index<N - 1>(i, std::forward<F>(f));
      }
  }
-
+//! \endcond
  
  } // namespace compat
  } // namespace gmx
diff --git a/src/gromacs/gpu_utils/gputraits.h b/src/gromacs/gpu_utils/gputraits.h

index 344b0427c1ae335fd03a69e890e1dd974e9d3ec8..3fb58eb8b43a5c5c6076e547b067feefdf72848d 100644 (file)
--- a/src/gromacs/gpu_utils/gputraits.h
+++ b/src/gromacs/gpu_utils/gputraits.h
@@ -81,6 +81,7 @@ struct Float4
  
  namespace gmx
  {
+//! Reinterpret-cast any pointer \p in to \c Float3, checking the type compatibility.
  template<typename T>
  static inline Float3* asGenericFloat3Pointer(T* in)
  {
@@ -90,6 +91,7 @@ static inline Float3* asGenericFloat3Pointer(T* in)
      return reinterpret_cast<Float3*>(in);
  }
  
+//! Reinterpret-cast any const pointer \p in to \c Float3, checking the type compatibility.
  template<typename T>
  static inline const Float3* asGenericFloat3Pointer(const T* in)
  {
@@ -99,6 +101,7 @@ static inline const Float3* asGenericFloat3Pointer(const T* in)
      return reinterpret_cast<const Float3*>(in);
  }
  
+//! Reinterpret-cast any container \p in to \c Float3, checking the type compatibility.
  template<typename C>
  static inline Float3* asGenericFloat3Pointer(C& in)
  {
@@ -108,6 +111,7 @@ static inline Float3* asGenericFloat3Pointer(C& in)
      return reinterpret_cast<Float3*>(in.data());
  }
  
+//! Reinterpret-cast any const container \p in to \c Float3, checking the type compatibility.
  template<typename C>
  static inline const Float3* asGenericFloat3Pointer(const C& in)
  {
diff --git a/src/gromacs/gpu_utils/gputraits_sycl.h b/src/gromacs/gpu_utils/gputraits_sycl.h

index 9c64d3f303ba2b875c7708c5fca4d7e1952cf874..b6b76c7a44d36c295cec086f78acd2fd671e921e 100644 (file)
--- a/src/gromacs/gpu_utils/gputraits_sycl.h
+++ b/src/gromacs/gpu_utils/gputraits_sycl.h
@@ -49,6 +49,7 @@
  #include "gromacs/gpu_utils/gmxsycl.h"
  #include "gromacs/math/vectypes.h"
  
+//! Type of device texture object. In SYCL, that would be \c sycl::image, but it's not used.
  using DeviceTexture = void*;
  
  //! \brief Single GPU call timing event, not used with SYCL
diff --git a/src/gromacs/gpu_utils/ocl_compiler.cpp b/src/gromacs/gpu_utils/ocl_compiler.cpp

index c8ed6f19c54a00b76cd53b12fbd619c145ef187f..98b02df93a4163a12d27d855c219e0627e7c6dc3 100644 (file)
--- a/src/gromacs/gpu_utils/ocl_compiler.cpp
+++ b/src/gromacs/gpu_utils/ocl_compiler.cpp
@@ -2,7 +2,7 @@
   * This file is part of the GROMACS molecular simulation package.
   *
   * Copyright (c) 2012,2013,2014,2015,2016 by the GROMACS development team.
- * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by
+ * Copyright (c) 2017,2018,2019,2020,2021, by the GROMACS development team, led by
   * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   * and including many others, as listed in the AUTHORS file in the
   * top-level source directory and at http://www.gromacs.org.
@@ -168,7 +168,7 @@ static std::string selectCompilerOptions(DeviceVendor deviceVendor)
          compilerOptions += " -cl-opt-disable";
      }
  
-    /* Fastmath imprves performance on all supported arch */
+    /* Fastmath improves performance on all supported arch */
      if (getenv("GMX_OCL_DISABLE_FASTMATH") == nullptr)
      {
          compilerOptions += " -cl-fast-relaxed-math";
diff --git a/src/gromacs/gpu_utils/sycl_kernel_utils.h b/src/gromacs/gpu_utils/sycl_kernel_utils.h

index 42f3437bfe65979e3927c4866e0cf1e4ec5fafb0..8a2d6bace13724e5a5b1bec77fd110e87b1d4449 100644 (file)
--- a/src/gromacs/gpu_utils/sycl_kernel_utils.h
+++ b/src/gromacs/gpu_utils/sycl_kernel_utils.h
@@ -58,7 +58,7 @@
  static constexpr auto mode_atomic = GMX_SYCL_DPCPP ? cl::sycl::access::mode::read_write :
                                                     /* GMX_SYCL_HIPSYCL */ cl::sycl::access::mode::atomic;
  
-// \brief Full warp active thread mask used in CUDA warp-level primitives.
+//! \brief Full warp active thread mask used in CUDA warp-level primitives.
  static constexpr unsigned int c_cudaFullWarpMask = 0xffffffff;
  
  /*! \brief Convenience wrapper to do atomic addition to a global buffer.
@@ -91,9 +91,9 @@ static inline void atomicFetchAdd(DeviceAccessor<float, mode_atomic> acc, const
  #endif
  }
  
-/* \brief Issue an intra sub-group barrier.
+/*! \brief Issue an intra sub-group barrier.
   *
- * Equivalent with CUDA syncwarp(c_cudaFullWarpMask).
+ * Equivalent with CUDA's \c syncwarp(c_cudaFullWarpMask).
   *
   */
  static inline void subGroupBarrier(const cl::sycl::nd_item<1> itemIdx)
diff --git a/src/gromacs/hardware/device_information.h b/src/gromacs/hardware/device_information.h

index 079be8be458e6f312d9a85f8a0600b40e9afc087..a03e91597214bc4dd7dd8d45e79f5a9a090a5532 100644 (file)
--- a/src/gromacs/hardware/device_information.h
+++ b/src/gromacs/hardware/device_information.h
@@ -64,6 +64,7 @@
  
  //! Constant used to help minimize preprocessed code
  static constexpr bool c_binarySupportsGpus = (GMX_GPU != 0);
+//! Whether \ref DeviceInformation can be serialized for sending via MPI.
  static constexpr bool c_canSerializeDeviceInformation =
          (!GMX_GPU_OPENCL && !GMX_GPU_SYCL); /*NOLINT(misc-redundant-expression)*/
  
diff --git a/src/gromacs/mdlib/gpuforcereduction_impl_internal_sycl.cpp b/src/gromacs/mdlib/gpuforcereduction_impl_internal_sycl.cpp

index 0ccbc98e09cfbcea1fcc4a205491a2cf6fec3da6..bd76fbf7d628c717716e0285cff896ef4e26b478 100644 (file)
--- a/src/gromacs/mdlib/gpuforcereduction_impl_internal_sycl.cpp
+++ b/src/gromacs/mdlib/gpuforcereduction_impl_internal_sycl.cpp
@@ -63,6 +63,7 @@ namespace gmx
  
  using cl::sycl::access::mode;
  
+//! \brief Function returning the force reduction kernel lambda.
  template<bool addRvecForce, bool accumulateForce>
  static auto reduceKernel(cl::sycl::handler&                                 cgh,
                           DeviceAccessor<Float3, mode::read>                 a_nbnxmForce,
@@ -97,6 +98,7 @@ static auto reduceKernel(cl::sycl::handler&                                 cgh,
      };
  }
  
+//! \brief Force reduction SYCL kernel launch code.
  template<bool addRvecForce, bool accumulateForce>
  static void launchReductionKernel_(const int                   numAtoms,
                                     const int                   atomStart,
@@ -119,7 +121,7 @@ static void launchReductionKernel_(const int                   numAtoms,
      });
  }
  
-/*! \brief Select templated kernel and launch it. */
+/*! \brief Select templated Force reduction kernel and launch it. */
  void launchForceReductionKernel(int                  numAtoms,
                                  int                  atomStart,
                                  bool                 addRvecForce,
diff --git a/src/gromacs/mdlib/leapfrog_gpu.cpp b/src/gromacs/mdlib/leapfrog_gpu.cpp

index b07c167c7c2afabb51304438190cac6ce7979ca4..1d9167ec7984642b50902d6ef8d3c5dbfa6b709d 100644 (file)
--- a/src/gromacs/mdlib/leapfrog_gpu.cpp
+++ b/src/gromacs/mdlib/leapfrog_gpu.cpp
@@ -162,7 +162,7 @@ void LeapFrogGpu::set(const int numAtoms, const real* inverseMasses, const unsig
      copyToDeviceBuffer(
              &d_inverseMasses_, inverseMasses, 0, numAtoms_, deviceStream_, GpuApiCallBehavior::Sync, nullptr);
  
-    // Temperature scale group map only used if there are more then one group
+    // Temperature scale group map only used if there are more than one group
      if (numTempScaleValues_ > 1)
      {
          reallocateDeviceBuffer(
diff --git a/src/gromacs/mdlib/leapfrog_gpu_internal.h b/src/gromacs/mdlib/leapfrog_gpu_internal.h

index 76330fe1802048180083a4e2df2de8c7d1834651..44c9ca6804ddf3212bc9dea40104d772ee972264 100644 (file)
--- a/src/gromacs/mdlib/leapfrog_gpu_internal.h
+++ b/src/gromacs/mdlib/leapfrog_gpu_internal.h
@@ -50,6 +50,23 @@
  namespace gmx
  {
  
+/*! \brief Backend-specific function to launch GPU Leap Frog kernel.
+ *
+ * \param numAtoms Total number of atoms.
+ * \param[in,out] d_x Buffer containing initial coordinates, and where the updated ones will be written.
+ * \param[out] d_xp Buffer where a copy of the initial coordinates will be written.
+ * \param[in,out] d_v Buffer containing initial velocities, and where the updated ones will be written.
+ * \param[in]  d_f Buffer containing forces.
+ * \param[in] d_inverseMasses Buffer containing atoms' reciprocal masses.
+ * \param dt Timestep.
+ * \param doTemperatureScaling Whether temperature scaling is needed.
+ * \param numTempScaleValues Number of different T-couple values.
+ * \param d_tempScaleGroups Mapping of atoms into temperature scaling groups.
+ * \param d_lambdas Temperature scaling factors (one per group).
+ * \param prVelocityScalingType Type of Parrinello-Rahman velocity rescaling.
+ * \param prVelocityScalingMatrixDiagonal Diagonal elements of Parrinello-Rahman velocity scaling matrix.
+ * \param deviceStream Device stream for kernel launch.
+ */
  void launchLeapFrogKernel(int                          numAtoms,
                            DeviceBuffer<Float3>         d_x,
                            DeviceBuffer<Float3>         d_xp,
diff --git a/src/gromacs/mdlib/leapfrog_gpu_internal_sycl.cpp b/src/gromacs/mdlib/leapfrog_gpu_internal_sycl.cpp

index d0d019c25ebe6faee2de372cb4c1322bcee26177..b5572dcfe136f4b3ccc9a6059c4c94829c164cfb 100644 (file)
--- a/src/gromacs/mdlib/leapfrog_gpu_internal_sycl.cpp
+++ b/src/gromacs/mdlib/leapfrog_gpu_internal_sycl.cpp
@@ -83,7 +83,7 @@ using cl::sycl::access::mode;
   * \param[in]     dt                               Timestep.
   * \param[in]     a_lambdas                        Temperature scaling factors (one per group).
   * \param[in]     a_tempScaleGroups                Mapping of atoms into groups.
- * \param[in]     prVelocityScalingMatrixDiagonal  Diagonal elements of Parrinello-Rahman velocity scaling matrix
+ * \param[in]     prVelocityScalingMatrixDiagonal  Diagonal elements of Parrinello-Rahman velocity scaling matrix.
   */
  template<NumTempScaleValues numTempScaleValues, VelocityScalingType velocityScaling>
  auto leapFrogKernel(
@@ -160,6 +160,7 @@ auto leapFrogKernel(
      };
  }
  
+//! \brief Leap Frog SYCL kernel launch code.
  template<NumTempScaleValues numTempScaleValues, VelocityScalingType velocityScaling, class... Args>
  static cl::sycl::event launchLeapFrogKernel(const DeviceStream& deviceStream, int numAtoms, Args&&... args)
  {
@@ -178,6 +179,7 @@ static cl::sycl::event launchLeapFrogKernel(const DeviceStream& deviceStream, in
      return e;
  }
  
+//! Convert \p doTemperatureScaling and \p numTempScaleValues to \ref NumTempScaleValues.
  static NumTempScaleValues getTempScalingType(bool doTemperatureScaling, int numTempScaleValues)
  {
      if (!doTemperatureScaling)
diff --git a/src/gromacs/mdlib/lincs_gpu_internal.h b/src/gromacs/mdlib/lincs_gpu_internal.h

index cd421c198c07646d20e45d8ab052cb7915a590ed..db44205fb37af7e80dd6f2ae2392c5b9e9406f92 100644 (file)
--- a/src/gromacs/mdlib/lincs_gpu_internal.h
+++ b/src/gromacs/mdlib/lincs_gpu_internal.h
@@ -57,6 +57,17 @@ struct LincsGpuKernelParameters;
  //! Number of threads in a GPU block
  constexpr static int c_threadsPerBlock = 256;
  
+/*! \brief Backend-specific function to launch LINCS kernel.
+ *
+ * \param kernelParams LINCS parameters.
+ * \param d_x Initial coordinates before the integration.
+ * \param d_xp Coordinates after the integration which will be updated.
+ * \param updateVelocities Whether to also update velocities.
+ * \param d_v Velocities to update (ignored if \p updateVelocities is \c false).
+ * \param invdt Reciprocal of timestep.
+ * \param computeVirial Whether to compute the virial.
+ * \param deviceStream Device stream for kernel launch.
+ */
  void launchLincsGpuKernel(const LincsGpuKernelParameters& kernelParams,
                            const DeviceBuffer<Float3>&     d_x,
                            DeviceBuffer<Float3>            d_xp,
diff --git a/src/gromacs/mdlib/settle_gpu_internal_sycl.cpp b/src/gromacs/mdlib/settle_gpu_internal_sycl.cpp

index 340b39bb7357ec66f92314e766ebc897b34fa34d..3e5b56fdf309e8cd8f8c46f190b42486d3bca814 100644 (file)
--- a/src/gromacs/mdlib/settle_gpu_internal_sycl.cpp
+++ b/src/gromacs/mdlib/settle_gpu_internal_sycl.cpp
@@ -59,6 +59,7 @@ using cl::sycl::access::target;
  //! Number of work-items in a work-group
  constexpr static int sc_workGroupSize = 256;
  
+//! \brief Function returning the SETTLE kernel lambda.
  template<bool updateVelocities, bool computeVirial>
  auto settleKernel(cl::sycl::handler&                                           cgh,
                    const int                                                    numSettles,
@@ -349,6 +350,7 @@ auto settleKernel(cl::sycl::handler&                                           c
  template<bool updateVelocities, bool computeVirial>
  class SettleKernelName;
  
+//! \brief SETTLE SYCL kernel launch code.
  template<bool updateVelocities, bool computeVirial, class... Args>
  static cl::sycl::event launchSettleKernel(const DeviceStream& deviceStream, int numSettles, Args&&... args)
  {
diff --git a/src/gromacs/mdlib/update_constrain_gpu_internal_sycl.cpp b/src/gromacs/mdlib/update_constrain_gpu_internal_sycl.cpp

index 1843b2f75b871d127bb1ade517e1bdaaa2ec2aae..f7113a203fdbc55bf765052ad94dbb71b73cc3a1 100644 (file)
--- a/src/gromacs/mdlib/update_constrain_gpu_internal_sycl.cpp
+++ b/src/gromacs/mdlib/update_constrain_gpu_internal_sycl.cpp
@@ -55,6 +55,7 @@ class ScaleKernel;
  namespace gmx
  {
  
+//! \brief Function returning the scaling kernel lambda.
  static auto scaleKernel(cl::sycl::handler&                                         cgh,
                          DeviceAccessor<Float3, cl::sycl::access::mode::read_write> a_x,
                          const ScalingMatrix                                        scalingMatrix)
diff --git a/src/gromacs/mdtypes/state.h b/src/gromacs/mdtypes/state.h

index df3691adfef3fe8dfd9b822131eed92f86da4a30..81c9d13a6b3071b915c8858973d434de39a79b9a 100644 (file)
--- a/src/gromacs/mdtypes/state.h
+++ b/src/gromacs/mdtypes/state.h
@@ -88,7 +88,7 @@ using PaddedHostVector = gmx::PaddedHostVector<T>;
   * Currently the random seeds for SD and BD are missing.
   */
  
-/* \brief Enum for all entries in \p t_state
+/*! \brief Enum for all entries in \p t_state
   *
   * These enums are used in flags as (1<<est...).
   * The order of these enums should not be changed,
diff --git a/src/gromacs/nbnxm/grid.cpp b/src/gromacs/nbnxm/grid.cpp

index 75b44dfdcbf9c6aabf7b00b5e203335a1ec544a2..9c4b7d410854ed57846a4d5faf45c502e02575fe 100644 (file)
--- a/src/gromacs/nbnxm/grid.cpp
+++ b/src/gromacs/nbnxm/grid.cpp
@@ -99,7 +99,7 @@ static real gridAtomDensity(int numAtoms, const rvec lowerCorner, const rvec upp
      return static_cast<real>(numAtoms) / (size[XX] * size[YY] * size[ZZ]);
  }
  
-// Get approximate dimensions of each cell. Returns the length along X and Y.
+//! \brief Get approximate dimensions of each cell. Returns the length along X and Y.
  static std::array<real, DIM - 1> getTargetCellLength(const Grid::Geometry& geometry, const real atomDensity)
  {
      if (geometry.isSimple)
diff --git a/src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel.cpp b/src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel.cpp

index ae703bdb16f042f7ea63e5749805c86418ff1656..7304922c09f19ae5ea9a54a8b490190b837c040f 100644 (file)
--- a/src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel.cpp
+++ b/src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel.cpp
@@ -70,9 +70,9 @@ struct EnergyFunctionProperties {
      static constexpr bool elecEwaldTab =
              (elecType == ElecType::EwaldTab || elecType == ElecType::EwaldTabTwin); ///< EL_EWALD_TAB
      static constexpr bool elecEwaldTwin =
-            (elecType == ElecType::EwaldAnaTwin || elecType == ElecType::EwaldTabTwin);
-    static constexpr bool elecEwald        = (elecEwaldAna || elecEwaldTab); ///< EL_EWALD_ANY
-    static constexpr bool vdwCombLB        = (vdwType == VdwType::CutCombLB);
+            (elecType == ElecType::EwaldAnaTwin || elecType == ElecType::EwaldTabTwin); ///< Use twin cut-off.
+    static constexpr bool elecEwald = (elecEwaldAna || elecEwaldTab);  ///< EL_EWALD_ANY
+    static constexpr bool vdwCombLB = (vdwType == VdwType::CutCombLB); ///< LJ_COMB && !LJ_COMB_GEOM
      static constexpr bool vdwCombGeom      = (vdwType == VdwType::CutCombGeom); ///< LJ_COMB_GEOM
      static constexpr bool vdwComb          = (vdwCombLB || vdwCombGeom);        ///< LJ_COMB
      static constexpr bool vdwEwaldCombGeom = (vdwType == VdwType::EwaldGeom); ///< LJ_EWALD_COMB_GEOM
@@ -87,9 +87,6 @@ struct EnergyFunctionProperties {
  template<enum VdwType vdwType>
  constexpr bool ljComb = EnergyFunctionProperties<ElecType::Count, vdwType>().vdwComb;
  
-template<enum ElecType elecType> // Yes, ElecType
-constexpr bool vdwCutoffCheck = EnergyFunctionProperties<elecType, VdwType::Count>().elecEwaldTwin;
-
  template<enum ElecType elecType>
  constexpr bool elecEwald = EnergyFunctionProperties<elecType, VdwType::Count>().elecEwald;
  
@@ -104,6 +101,7 @@ using cl::sycl::access::fence_space;
  using cl::sycl::access::mode;
  using cl::sycl::access::target;
  
+//! \brief Convert \p sigma and \p epsilon VdW parameters to \c c6,c12 pair.
  static inline Float2 convertSigmaEpsilonToC6C12(const float sigma, const float epsilon)
  {
      const float sigma2 = sigma * sigma;
@@ -111,9 +109,10 @@ static inline Float2 convertSigmaEpsilonToC6C12(const float sigma, const float e
      const float c6     = epsilon * sigma6;
      const float c12    = c6 * sigma6;
  
-    return Float2(c6, c12);
+    return { c6, c12 };
  }
  
+//! \brief Calculate force and energy for a pair of atoms, VdW force-switch flavor.
  template<bool doCalcEnergies>
  static inline void ljForceSwitch(const shift_consts_t         dispersionShift,
                                   const shift_consts_t         repulsionShift,
@@ -342,10 +341,20 @@ static inline void reduceForceJShuffle(Float3                             f,
      }
  }
  
-// This function also requires sm_buf to have a length of at least 1.
-// The function returns:
-//     - for thread #0 in the group: sum of all valueToReduce in a group
-//     - for other threads: unspecified
+/*!
+ * \brief Do workgroup-level reduction of a single \c float.
+ *
+ * While SYCL has \c sycl::reduce_over_group, it currently (oneAPI 2021.3.0) uses a very large
+ * shared memory buffer, which leads to a reduced occupancy.
+ *
+ * \tparam subGroupSize Size of a sub-group.
+ * \tparam groupSize Size of a work-group.
+ * \param itemIdx Current thread's \c sycl::nd_item.
+ * \param tidxi Current thread's linearized local index.
+ * \param sm_buf Accessor for local reduction buffer.
+ * \param valueToReduce Current thread's value. Must have length of at least 1.
+ * \return For thread with \p tidxi 0: sum of all \p valueToReduce. Other threads: unspecified.
+ */
  template<int subGroupSize, int groupSize>
  static inline float groupReduce(const cl::sycl::nd_item<1> itemIdx,
                                  const unsigned int         tidxi,
@@ -434,7 +443,7 @@ static inline void reduceForceJ(cl::sycl::accessor<float, 1, mode::read_write, t
  
  /*! \brief Final i-force reduction.
   *
- * Reduce c_nbnxnGpuNumClusterPerSupercluster i-force componets stored in \p fCiBuf[]
+ * Reduce c_nbnxnGpuNumClusterPerSupercluster i-force components stored in \p fCiBuf[]
   * accumulating atomically into \p a_f.
   * If \p calcFShift is true, further reduce shift forces and atomically accumulate into \p a_fShift.
   *
@@ -642,7 +651,7 @@ auto nbnxmKernel(cl::sycl::handler&                                   cgh,
              (props.elecEwald || props.elecRF || props.vdwEwald || (props.elecCutoff && doCalcEnergies));
  
      // The post-prune j-i cluster-pair organization is linked to how exclusion and interaction mask data is stored.
-    // Currently this is ideally suited for 32-wide subgroup size but slightly less so for others,
+    // Currently, this is ideally suited for 32-wide subgroup size but slightly less so for others,
      // e.g. subGroupSize > prunedClusterPairSize on AMD GCN / CDNA.
      // Hence, the two are decoupled.
      // When changing this code, please update requiredSubGroupSizeForNbnxm in src/gromacs/hardware/device_management_sycl.cpp.
@@ -996,7 +1005,7 @@ auto nbnxmKernel(cl::sycl::handler&                                   cgh,
                                  if constexpr (props.elecRF)
                                  {
                                      energyElec +=
-                                            qi * qj * (pairExclMask * rInv + 0.5f * twoKRf * r2 - cRF);
+                                            qi * qj * (pairExclMask * rInv + 0.5F * twoKRf * r2 - cRF);
                                  }
                                  if constexpr (props.elecEwald)
                                  {
@@ -1030,7 +1039,7 @@ auto nbnxmKernel(cl::sycl::handler&                                   cgh,
          } // for (int j4 = cij4Start; j4 < cij4End; j4 += 1)
  
          /* skip central shifts when summing shift forces */
-        const bool doCalcShift = (calcShift && !(nbSci.shift == gmx::c_centralShiftIndex));
+        const bool doCalcShift = (calcShift && nbSci.shift != gmx::c_centralShiftIndex);
  
          reduceForceIAndFShift(
                  sm_reductionBuffer, fCiBuf, doCalcShift, itemIdx, tidxi, tidxj, sci, nbSci.shift, a_f, a_fShift);
@@ -1051,6 +1060,7 @@ auto nbnxmKernel(cl::sycl::handler&                                   cgh,
      };
  }
  
+//! \brief NBNXM kernel launch code.
  template<bool doPruneNBL, bool doCalcEnergies, enum ElecType elecType, enum VdwType vdwType, class... Args>
  cl::sycl::event launchNbnxmKernel(const DeviceStream& deviceStream, const int numSci, Args&&... args)
  {
@@ -1077,6 +1087,7 @@ cl::sycl::event launchNbnxmKernel(const DeviceStream& deviceStream, const int nu
      return e;
  }
  
+//! \brief Select templated kernel and launch it.
  template<class... Args>
  cl::sycl::event chooseAndLaunchNbnxmKernel(bool          doPruneNBL,
                                             bool          doCalcEnergies,
diff --git a/src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel.h b/src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel.h

index dd7f52138d9c2c850b2254577b6b7054ef313aa5..bb256a8482cdb9bb4a56001aebd0d23016ec8ab5 100644 (file)
--- a/src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel.h
+++ b/src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel.h
@@ -53,6 +53,12 @@ namespace Nbnxm
  {
  using gmx::InteractionLocality;
  
+/*! Launch SYCL NBNXM kernel.
+ *
+ * \param nb Non-bonded parameters.
+ * \param stepWork Workload flags for the current step.
+ * \param iloc Interaction locality.
+ */
  void launchNbnxmKernel(NbnxmGpu* nb, const gmx::StepWorkload& stepWork, const InteractionLocality iloc);
  
  } // namespace Nbnxm
diff --git a/src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel_pruneonly.cpp b/src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel_pruneonly.cpp

index 7770915c4d84a6ac0864f3b25cb5b54de39bca79..1562cbc6fddbf0bcb549e16580de564e97458c29 100644 (file)
--- a/src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel_pruneonly.cpp
+++ b/src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel_pruneonly.cpp
@@ -220,6 +220,7 @@ auto nbnxmKernelPruneOnly(cl::sycl::handler&                            cgh,
      };
  }
  
+//! \brief Leap Frog SYCL prune-only kernel launch code.
  template<bool haveFreshList, class... Args>
  cl::sycl::event launchNbnxmKernelPruneOnly(const DeviceStream& deviceStream,
                                             const int           numSciInPart,
@@ -247,6 +248,7 @@ cl::sycl::event launchNbnxmKernelPruneOnly(const DeviceStream& deviceStream,
      return e;
  }
  
+//! \brief Select templated kernel and launch it.
  template<class... Args>
  cl::sycl::event chooseAndLaunchNbnxmKernelPruneOnly(bool haveFreshList, Args&&... args)
  {
diff --git a/src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel_pruneonly.h b/src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel_pruneonly.h

index 6af97b1d1ed190f803a3ee1f53c6c7a91db6a255..da2e96b99673d206594eaac478b1aee8fddd4c53 100644 (file)
--- a/src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel_pruneonly.h
+++ b/src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel_pruneonly.h
@@ -52,6 +52,14 @@ namespace Nbnxm
  {
  using gmx::InteractionLocality;
  
+/*! Launch SYCL NBNXM prune-only kernel.
+ *
+ * \param nb Non-bonded parameters.
+ * \param iloc Interaction locality.
+ * \param numParts Total number of rolling-prune parts.
+ * \param part Number of the part to prune.
+ * \param numSciInPart Number of superclusters in \p part.
+ */
  void launchNbnxmKernelPruneOnly(NbnxmGpu*                 nb,
                                  const InteractionLocality iloc,
                                  const int                 numParts,
diff --git a/src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel_utils.h b/src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel_utils.h

index c316555d0fd1bb637ded9db9853cd3f6ec32c598..53633da4bedc1da7edae8cf5db099b7d7f02c7c0 100644 (file)
--- a/src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel_utils.h
+++ b/src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel_utils.h
@@ -49,11 +49,13 @@ namespace Nbnxm
  {
  
  #ifndef GMX_NBNXN_PRUNE_KERNEL_J4_CONCURRENCY
+//! \brief Default for the prune kernel's j4 processing concurrency.
  #    define GMX_NBNXN_PRUNE_KERNEL_J4_CONCURRENCY 4
  #endif
-/*! \brief Macro defining default for the prune kernel's j4 processing concurrency.
+
+/*! \brief Prune kernel's j4 processing concurrency.
   *
- *  The GMX_NBNXN_PRUNE_KERNEL_J4_CONCURRENCY macro allows compile-time override.
+ *  The \c GMX_NBNXN_PRUNE_KERNEL_J4_CONCURRENCY macro allows compile-time override.
   */
  static constexpr int c_syclPruneKernelJ4Concurrency = GMX_NBNXN_PRUNE_KERNEL_J4_CONCURRENCY;
  
@@ -82,14 +84,13 @@ static constexpr float c_oneTwelfth = 0.08333333F;
  //! \brief Convert 3D range to 1D
  static inline cl::sycl::range<1> flattenRange(cl::sycl::range<3> range3d)
  {
-    return cl::sycl::range<1>(range3d.size());
+    return { range3d.size() };
  }
  
  //! \brief Convert 3D nd_range to 1D
  static inline cl::sycl::nd_range<1> flattenNDRange(cl::sycl::nd_range<3> nd_range3d)
  {
-    return cl::sycl::nd_range<1>(flattenRange(nd_range3d.get_global_range()),
-                                 flattenRange(nd_range3d.get_local_range()));
+    return { flattenRange(nd_range3d.get_global_range()), flattenRange(nd_range3d.get_local_range()) };
  }
  
  //! \brief Convert flattened 1D index to 3D
@@ -100,7 +101,7 @@ static inline cl::sycl::id<3> unflattenId(cl::sycl::id<1> id1d)
      const unsigned     id      = id1d[0];
      const unsigned     z       = id / rangeXY;
      const unsigned     xy      = id % rangeXY;
-    return cl::sycl::id<3>(xy % rangeX, xy / rangeX, z);
+    return { xy % rangeX, xy / rangeX, z };
  }
  
  } // namespace Nbnxm
diff --git a/src/gromacs/utility/template_mp.h b/src/gromacs/utility/template_mp.h

index acd3744f022d5acd94e452e4eed5ba60c88b5dac..902e1deef3df2ef4534f2236bbac610e9149d842 100644 (file)
--- a/src/gromacs/utility/template_mp.h
+++ b/src/gromacs/utility/template_mp.h
@@ -54,12 +54,6 @@
  namespace gmx
  {
  
-template<class Function>
-auto dispatchTemplatedFunction(Function&& f)
-{
-    return std::forward<Function>(f)();
-}
-
  /*! \internal \brief
   * Helper function to select appropriate template based on runtime values.
   *
@@ -85,7 +79,19 @@ auto dispatchTemplatedFunction(Function&& f)
              p0, p1, p2);
      }
   * \endcode
- */
+ *
+ * \tparam Function Type of \p f.
+ * \param f Function to call.
+ * \return The result of calling \c f().
+*/
+template<class Function>
+auto dispatchTemplatedFunction(Function&& f)
+{
+    return std::forward<Function>(f)();
+}
+
+// Recursive templates confuse Doxygen
+//! \cond
  template<class Function, class Enum, class... Enums>
  auto dispatchTemplatedFunction(Function&& f, Enum e, Enums... es)
  {
@@ -110,6 +116,7 @@ auto dispatchTemplatedFunction(Function&& f, bool e, Enums... es)
              },
              es...);
  }
+//! \endcond
  
  } // namespace gmx
author	Andrey Alekseenko <al42and@gmail.com>
	Tue, 24 Aug 2021 15:13:46 +0000 (18:13 +0300)
committer	Mark Abraham <mark.j.abraham@gmail.com>
	Thu, 26 Aug 2021 20:57:41 +0000 (20:57 +0000)
src/gromacs/compat/mp11.h		patch \| blob \| history
src/gromacs/gpu_utils/gputraits.h		patch \| blob \| history
src/gromacs/gpu_utils/gputraits_sycl.h		patch \| blob \| history
src/gromacs/gpu_utils/ocl_compiler.cpp		patch \| blob \| history
src/gromacs/gpu_utils/sycl_kernel_utils.h		patch \| blob \| history
src/gromacs/hardware/device_information.h		patch \| blob \| history
src/gromacs/mdlib/gpuforcereduction_impl_internal_sycl.cpp		patch \| blob \| history
src/gromacs/mdlib/leapfrog_gpu.cpp		patch \| blob \| history
src/gromacs/mdlib/leapfrog_gpu_internal.h		patch \| blob \| history
src/gromacs/mdlib/leapfrog_gpu_internal_sycl.cpp		patch \| blob \| history
src/gromacs/mdlib/lincs_gpu_internal.h		patch \| blob \| history
src/gromacs/mdlib/settle_gpu_internal_sycl.cpp		patch \| blob \| history
src/gromacs/mdlib/update_constrain_gpu_internal_sycl.cpp		patch \| blob \| history
src/gromacs/mdtypes/state.h		patch \| blob \| history
src/gromacs/nbnxm/grid.cpp		patch \| blob \| history
src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel.cpp		patch \| blob \| history
src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel.h		patch \| blob \| history
src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel_pruneonly.cpp		patch \| blob \| history
src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel_pruneonly.h		patch \| blob \| history
src/gromacs/nbnxm/sycl/nbnxm_sycl_kernel_utils.h		patch \| blob \| history
src/gromacs/utility/template_mp.h		patch \| blob \| history