Minor fixes to comments and Doxygen

author Andrey Alekseenko <al42and@gmail.com>

Tue, 14 Sep 2021 17:50:47 +0000 (20:50 +0300)

committer Andrey Alekseenko <al42and@gmail.com>

Thu, 16 Sep 2021 15:14:31 +0000 (15:14 +0000)
author Andrey Alekseenko <al42and@gmail.com>
Tue, 14 Sep 2021 17:50:47 +0000 (20:50 +0300)
committer Andrey Alekseenko <al42and@gmail.com>
Thu, 16 Sep 2021 15:14:31 +0000 (15:14 +0000)
diff --git a/cmake/gmxManageNvccConfig.cmake b/cmake/gmxManageNvccConfig.cmake

index 5a83a80f4a418a7b2b648e60d2e30755de4da5cd..e72fdaca8c44d30bd369a98b0d5d706647028bfb 100644 (file)
--- a/cmake/gmxManageNvccConfig.cmake
+++ b/cmake/gmxManageNvccConfig.cmake
@@ -51,7 +51,7 @@
  # glibc source shows that _FORCE_INLINES is only used in this string.h
  # feature and performance of memcpy variants is unimportant for CUDA
  # code in GROMACS. So this workaround is good enough to keep problems
-# away from users installing GROMACS. See Issue #1942.
+# away from users installing GROMACS. See Issue #1982.
  function(work_around_glibc_2_23)
      try_compile(IS_GLIBC_2_23_OR_HIGHER ${CMAKE_BINARY_DIR} ${CMAKE_SOURCE_DIR}/cmake/TestGlibcVersion.cpp)
      if(IS_GLIBC_2_23_OR_HIGHER)
diff --git a/src/gromacs/ewald/pme_gather.cu b/src/gromacs/ewald/pme_gather.cu

index ec1d9ecbcfd74fa9c1aed1d38d72d6a90119f5df..28de3817e5fcd6a4fc004e27cfdd1d9f32199e26 100644 (file)
--- a/src/gromacs/ewald/pme_gather.cu
+++ b/src/gromacs/ewald/pme_gather.cu
@@ -68,10 +68,10 @@ __device__ __forceinline__ float read_grid_size(const float* realGridSizeFP, con
  
  /*! \brief Reduce the partial force contributions.
   *
- * \tparam[in] order              The PME order (must be 4).
- * \tparam[in] atomDataSize       The number of partial force contributions for each atom (currently
+ * \tparam     order              The PME order (must be 4).
+ * \tparam     atomDataSize       The number of partial force contributions for each atom (currently
   *                                order^2 == 16)
- * \tparam[in] blockSize          The CUDA block size
+ * \tparam     blockSize          The CUDA block size
   *
   * \param[out] sm_forces          Shared memory array with the output forces (number of elements
   *                                is number of atoms per block)
@@ -220,10 +220,10 @@ __device__ __forceinline__ void reduce_atom_forces(float3* __restrict__ sm_force
  
  /*! \brief Calculate the sum of the force partial components (in X, Y and Z)
   *
- * \tparam[in] order              The PME order (must be 4).
- * \tparam[in] atomsPerWarp       The number of atoms per GPU warp.
- * \tparam[in] wrapX              Tells if the grid is wrapped in the X dimension.
- * \tparam[in] wrapY              Tells if the grid is wrapped in the Y dimension.
+ * \tparam     order              The PME order (must be 4).
+ * \tparam     atomsPerWarp       The number of atoms per GPU warp.
+ * \tparam     wrapX              Tells if the grid is wrapped in the X dimension.
+ * \tparam     wrapY              Tells if the grid is wrapped in the Y dimension.
   * \param[out] fx                 The force partial component in the X dimension.
   * \param[out] fy                 The force partial component in the Y dimension.
   * \param[out] fz                 The force partial component in the Z dimension.
@@ -335,12 +335,12 @@ __device__ __forceinline__ void calculateAndStoreGridForces(float3* __restrict__
   * A CUDA kernel which gathers the atom forces from the grid.
   * The grid is assumed to be wrapped in dimension Z.
   *
- * \tparam[in] order                The PME order (must be 4 currently).
- * \tparam[in] wrapX                Tells if the grid is wrapped in the X dimension.
- * \tparam[in] wrapY                Tells if the grid is wrapped in the Y dimension.
- * \tparam[in] numGrids             The number of grids to use in the kernel. Can be 1 or 2.
- * \tparam[in] readGlobal           Tells if we should read spline values from global memory
- * \tparam[in] threadsPerAtom       How many threads work on each atom
+ * \tparam     order                The PME order (must be 4 currently).
+ * \tparam     wrapX                Tells if the grid is wrapped in the X dimension.
+ * \tparam     wrapY                Tells if the grid is wrapped in the Y dimension.
+ * \tparam     numGrids             The number of grids to use in the kernel. Can be 1 or 2.
+ * \tparam     readGlobal           Tells if we should read spline values from global memory
+ * \tparam     threadsPerAtom       How many threads work on each atom
   *
   * \param[in]  kernelParams         All the PME GPU data.
   */
diff --git a/src/gromacs/ewald/pme_gpu_calculate_splines.cuh b/src/gromacs/ewald/pme_gpu_calculate_splines.cuh

index 1ff60f7ed17497fceda74771c9269667f3775df6..d8eefd43f0f129abdd6724d31e03726643becb80 100644 (file)
--- a/src/gromacs/ewald/pme_gpu_calculate_splines.cuh
+++ b/src/gromacs/ewald/pme_gpu_calculate_splines.cuh
@@ -140,10 +140,10 @@ __device__ inline void assertIsFinite(T gmx_unused arg)
  /*! \brief
   * General purpose function for loading atom-related data from global to shared memory.
   *
- * \tparam[in] T                  Data type (float/int/...)
- * \tparam[in] atomsPerBlock      Number of atoms processed by a block - should be accounted for in
+ * \tparam     T                  Data type (float/int/...)
+ * \tparam     atomsPerBlock      Number of atoms processed by a block - should be accounted for in
   * the size of the shared memory array.
- * \tparam[in] dataCountPerAtom   Number of data elements per single atom (e.g. DIM for an rvec
+ * \tparam     dataCountPerAtom   Number of data elements per single atom (e.g. DIM for an rvec
   * coordinates array).
   * \param[out] sm_destination     Shared memory array for output.
   * \param[in]  gm_source          Global memory array for input.
@@ -169,16 +169,16 @@ __device__ __forceinline__ void pme_gpu_stage_atom_data(T* __restrict__ sm_desti
   * This corresponds to the CPU functions calc_interpolation_idx() and make_bsplines().
   * First stage of the whole kernel.
   *
- * \tparam[in] order                PME interpolation order.
- * \tparam[in] atomsPerBlock        Number of atoms processed by a block - should be accounted for
+ * \tparam     order                PME interpolation order.
+ * \tparam     atomsPerBlock        Number of atoms processed by a block - should be accounted for
   *                                  in the sizes of the shared memory arrays.
- * \tparam[in] atomsPerWarp         Number of atoms processed by a warp
- * \tparam[in] writeSmDtheta        Bool controlling if the theta derivative should be written to
+ * \tparam     atomsPerWarp         Number of atoms processed by a warp
+ * \tparam     writeSmDtheta        Bool controlling if the theta derivative should be written to
   *                                  shared memory. Enables calculation of dtheta if set.
- * \tparam[in] writeGlobal          A boolean which tells if the theta values and gridlines should
+ * \tparam     writeGlobal          A boolean which tells if the theta values and gridlines should
   *                                  be written to global memory. Enables calculation of dtheta if
   *                                  set.
- * \tparam[in] numGrids             The number of grids using the splines.
+ * \tparam     numGrids             The number of grids using the splines.
   * \param[in]  kernelParams         Input PME CUDA data in constant memory.
   * \param[in]  atomIndexOffset      Starting atom index for the execution block w.r.t. global memory.
   * \param[in]  atomX                Atom coordinate of atom processed by thread.
diff --git a/src/gromacs/ewald/pme_solve.cu b/src/gromacs/ewald/pme_solve.cu

index 83e21b1f118a2e09dd2b31267c92ce978bc85b75..42dbf9c0ecb733c5a246033f01cdc3b393229bf4 100644 (file)
--- a/src/gromacs/ewald/pme_solve.cu
+++ b/src/gromacs/ewald/pme_solve.cu
@@ -52,9 +52,9 @@
  /*! \brief
   * PME complex grid solver kernel function.
   *
- * \tparam[in] gridOrdering             Specifies the dimension ordering of the complex grid.
- * \tparam[in] computeEnergyAndVirial   Tells if the reciprocal energy and virial should be computed.
- * \tparam[in] gridIndex                The index of the grid to use in the kernel.
+ * \tparam     gridOrdering             Specifies the dimension ordering of the complex grid.
+ * \tparam     computeEnergyAndVirial   Tells if the reciprocal energy and virial should be computed.
+ * \tparam     gridIndex                The index of the grid to use in the kernel.
   * \param[in]  kernelParams             Input PME CUDA data in constant memory.
   */
  template<GridOrdering gridOrdering, bool computeEnergyAndVirial, const int gridIndex>
diff --git a/src/gromacs/ewald/pme_spread.cu b/src/gromacs/ewald/pme_spread.cu

index 4765c9c4c8cdc81af1e01349924f149a56c72132..d0856602a9e4574e0081a8231f8ec3073e6d2d12 100644 (file)
--- a/src/gromacs/ewald/pme_spread.cu
+++ b/src/gromacs/ewald/pme_spread.cu
@@ -69,11 +69,11 @@
   * This corresponds to the CPU function spread_coefficients_bsplines_thread().
   * Optional second stage of the spline_and_spread_kernel.
   *
- * \tparam[in] order                PME interpolation order.
- * \tparam[in] wrapX                Whether the grid overlap in dimension X should be wrapped.
- * \tparam[in] wrapY                Whether the grid overlap in dimension Y should be wrapped.
- * \tparam[in] gridIndex            The index of the grid to use in the kernel.
- * \tparam[in] threadsPerAtom       How many threads work on each atom
+ * \tparam     order                PME interpolation order.
+ * \tparam     wrapX                Whether the grid overlap in dimension X should be wrapped.
+ * \tparam     wrapY                Whether the grid overlap in dimension Y should be wrapped.
+ * \tparam     gridIndex            The index of the grid to use in the kernel.
+ * \tparam     threadsPerAtom       How many threads work on each atom
   *
   * \param[in]  kernelParams         Input PME CUDA data in constant memory.
   * \param[in]  atomCharge           Atom charge/coefficient of atom processed by thread.
@@ -169,15 +169,15 @@ __device__ __forceinline__ void spread_charges(const PmeGpuCudaKernelParams kern
   * writeGlobal should be used removing the need to recalculate the theta values in the gather kernel.
   * Similarly for useOrderThreads large systems order threads per atom gives higher performance than order*order threads
   *
- * \tparam[in] order                PME interpolation order.
- * \tparam[in] computeSplines       A boolean which tells if the spline parameter and
+ * \tparam     order                PME interpolation order.
+ * \tparam     computeSplines       A boolean which tells if the spline parameter and
   *                                  gridline indices' computation should be performed.
- * \tparam[in] spreadCharges        A boolean which tells if the charge spreading should be performed.
- * \tparam[in] wrapX                A boolean which tells if the grid overlap in dimension X should be wrapped.
- * \tparam[in] wrapY                A boolean which tells if the grid overlap in dimension Y should be wrapped.
- * \tparam[in] numGrids             The number of grids to use in the kernel. Can be 1 or 2.
- * \tparam[in] writeGlobal          A boolean which tells if the theta values and gridlines should be written to global memory.
- * \tparam[in] threadsPerAtom       How many threads work on each atom
+ * \tparam     spreadCharges        A boolean which tells if the charge spreading should be performed.
+ * \tparam     wrapX                A boolean which tells if the grid overlap in dimension X should be wrapped.
+ * \tparam     wrapY                A boolean which tells if the grid overlap in dimension Y should be wrapped.
+ * \tparam     numGrids             The number of grids to use in the kernel. Can be 1 or 2.
+ * \tparam     writeGlobal          A boolean which tells if the theta values and gridlines should be written to global memory.
+ * \tparam     threadsPerAtom       How many threads work on each atom
   * \param[in]  kernelParams         Input PME CUDA data in constant memory.
   */
  template<int order, bool computeSplines, bool spreadCharges, bool wrapX, bool wrapY, int numGrids, bool writeGlobal, ThreadsPerAtom threadsPerAtom>
diff --git a/src/gromacs/gpu_utils/cuda_kernel_utils.cuh b/src/gromacs/gpu_utils/cuda_kernel_utils.cuh

index 0333d84ae892f8f422cd0205958d648dff79cd88..b8497c68f1435559eb3821bceacda4dd1a4ee1c3 100644 (file)
--- a/src/gromacs/gpu_utils/cuda_kernel_utils.cuh
+++ b/src/gromacs/gpu_utils/cuda_kernel_utils.cuh
@@ -58,7 +58,7 @@ __device__ __forceinline__ T LDG(const T* ptr)
  
  /*! \brief Fetch the value by \p index from the texture object.
   *
- * \tparam[in] T        Raw data type
+ * \tparam T            Raw data type
   * \param[in] texObj    Table texture object
   * \param[in] index     Non-negative element index
   * \returns             The value from the table at \p index
@@ -77,7 +77,7 @@ static __forceinline__ __device__ T fetchFromTexture(const cudaTextureObject_t t
   *  Depending on what is supported, it fetches parameters either
   *  using direct load or texture objects.
   *
- * \tparam[in] T        Raw data type
+ * \tparam T            Raw data type
   * \param[in] d_ptr     Device pointer to the raw table memory
   * \param[in] texObj    Table texture object
   * \param[in] index     Non-negative element index
author	Andrey Alekseenko <al42and@gmail.com>
	Tue, 14 Sep 2021 17:50:47 +0000 (20:50 +0300)
committer	Andrey Alekseenko <al42and@gmail.com>
	Thu, 16 Sep 2021 15:14:31 +0000 (15:14 +0000)
cmake/gmxManageNvccConfig.cmake		patch \| blob \| history
src/gromacs/ewald/pme_gather.cu		patch \| blob \| history
src/gromacs/ewald/pme_gpu_calculate_splines.cuh		patch \| blob \| history
src/gromacs/ewald/pme_solve.cu		patch \| blob \| history
src/gromacs/ewald/pme_spread.cu		patch \| blob \| history
src/gromacs/gpu_utils/cuda_kernel_utils.cuh		patch \| blob \| history