From 00bbe9d48d731f828b19f85b5c706f4114325a88 Mon Sep 17 00:00:00 2001 From: Andrey Alekseenko Date: Tue, 14 Sep 2021 20:50:47 +0300 Subject: [PATCH] Minor fixes to comments and Doxygen - Fix issue number in cmake/gmxManageNvccConfig.cmake. - Fix use of \tparam in CUDA code. --- cmake/gmxManageNvccConfig.cmake | 2 +- src/gromacs/ewald/pme_gather.cu | 26 +++++++++---------- .../ewald/pme_gpu_calculate_splines.cuh | 18 ++++++------- src/gromacs/ewald/pme_solve.cu | 6 ++--- src/gromacs/ewald/pme_spread.cu | 26 +++++++++---------- src/gromacs/gpu_utils/cuda_kernel_utils.cuh | 4 +-- 6 files changed, 41 insertions(+), 41 deletions(-) diff --git a/cmake/gmxManageNvccConfig.cmake b/cmake/gmxManageNvccConfig.cmake index 5a83a80f4a..e72fdaca8c 100644 --- a/cmake/gmxManageNvccConfig.cmake +++ b/cmake/gmxManageNvccConfig.cmake @@ -51,7 +51,7 @@ # glibc source shows that _FORCE_INLINES is only used in this string.h # feature and performance of memcpy variants is unimportant for CUDA # code in GROMACS. So this workaround is good enough to keep problems -# away from users installing GROMACS. See Issue #1942. +# away from users installing GROMACS. See Issue #1982. function(work_around_glibc_2_23) try_compile(IS_GLIBC_2_23_OR_HIGHER ${CMAKE_BINARY_DIR} ${CMAKE_SOURCE_DIR}/cmake/TestGlibcVersion.cpp) if(IS_GLIBC_2_23_OR_HIGHER) diff --git a/src/gromacs/ewald/pme_gather.cu b/src/gromacs/ewald/pme_gather.cu index ec1d9ecbcf..28de3817e5 100644 --- a/src/gromacs/ewald/pme_gather.cu +++ b/src/gromacs/ewald/pme_gather.cu @@ -68,10 +68,10 @@ __device__ __forceinline__ float read_grid_size(const float* realGridSizeFP, con /*! \brief Reduce the partial force contributions. * - * \tparam[in] order The PME order (must be 4). - * \tparam[in] atomDataSize The number of partial force contributions for each atom (currently + * \tparam order The PME order (must be 4). + * \tparam atomDataSize The number of partial force contributions for each atom (currently * order^2 == 16) - * \tparam[in] blockSize The CUDA block size + * \tparam blockSize The CUDA block size * * \param[out] sm_forces Shared memory array with the output forces (number of elements * is number of atoms per block) @@ -220,10 +220,10 @@ __device__ __forceinline__ void reduce_atom_forces(float3* __restrict__ sm_force /*! \brief Calculate the sum of the force partial components (in X, Y and Z) * - * \tparam[in] order The PME order (must be 4). - * \tparam[in] atomsPerWarp The number of atoms per GPU warp. - * \tparam[in] wrapX Tells if the grid is wrapped in the X dimension. - * \tparam[in] wrapY Tells if the grid is wrapped in the Y dimension. + * \tparam order The PME order (must be 4). + * \tparam atomsPerWarp The number of atoms per GPU warp. + * \tparam wrapX Tells if the grid is wrapped in the X dimension. + * \tparam wrapY Tells if the grid is wrapped in the Y dimension. * \param[out] fx The force partial component in the X dimension. * \param[out] fy The force partial component in the Y dimension. * \param[out] fz The force partial component in the Z dimension. @@ -335,12 +335,12 @@ __device__ __forceinline__ void calculateAndStoreGridForces(float3* __restrict__ * A CUDA kernel which gathers the atom forces from the grid. * The grid is assumed to be wrapped in dimension Z. * - * \tparam[in] order The PME order (must be 4 currently). - * \tparam[in] wrapX Tells if the grid is wrapped in the X dimension. - * \tparam[in] wrapY Tells if the grid is wrapped in the Y dimension. - * \tparam[in] numGrids The number of grids to use in the kernel. Can be 1 or 2. - * \tparam[in] readGlobal Tells if we should read spline values from global memory - * \tparam[in] threadsPerAtom How many threads work on each atom + * \tparam order The PME order (must be 4 currently). + * \tparam wrapX Tells if the grid is wrapped in the X dimension. + * \tparam wrapY Tells if the grid is wrapped in the Y dimension. + * \tparam numGrids The number of grids to use in the kernel. Can be 1 or 2. + * \tparam readGlobal Tells if we should read spline values from global memory + * \tparam threadsPerAtom How many threads work on each atom * * \param[in] kernelParams All the PME GPU data. */ diff --git a/src/gromacs/ewald/pme_gpu_calculate_splines.cuh b/src/gromacs/ewald/pme_gpu_calculate_splines.cuh index 1ff60f7ed1..d8eefd43f0 100644 --- a/src/gromacs/ewald/pme_gpu_calculate_splines.cuh +++ b/src/gromacs/ewald/pme_gpu_calculate_splines.cuh @@ -140,10 +140,10 @@ __device__ inline void assertIsFinite(T gmx_unused arg) /*! \brief * General purpose function for loading atom-related data from global to shared memory. * - * \tparam[in] T Data type (float/int/...) - * \tparam[in] atomsPerBlock Number of atoms processed by a block - should be accounted for in + * \tparam T Data type (float/int/...) + * \tparam atomsPerBlock Number of atoms processed by a block - should be accounted for in * the size of the shared memory array. - * \tparam[in] dataCountPerAtom Number of data elements per single atom (e.g. DIM for an rvec + * \tparam dataCountPerAtom Number of data elements per single atom (e.g. DIM for an rvec * coordinates array). * \param[out] sm_destination Shared memory array for output. * \param[in] gm_source Global memory array for input. @@ -169,16 +169,16 @@ __device__ __forceinline__ void pme_gpu_stage_atom_data(T* __restrict__ sm_desti * This corresponds to the CPU functions calc_interpolation_idx() and make_bsplines(). * First stage of the whole kernel. * - * \tparam[in] order PME interpolation order. - * \tparam[in] atomsPerBlock Number of atoms processed by a block - should be accounted for + * \tparam order PME interpolation order. + * \tparam atomsPerBlock Number of atoms processed by a block - should be accounted for * in the sizes of the shared memory arrays. - * \tparam[in] atomsPerWarp Number of atoms processed by a warp - * \tparam[in] writeSmDtheta Bool controlling if the theta derivative should be written to + * \tparam atomsPerWarp Number of atoms processed by a warp + * \tparam writeSmDtheta Bool controlling if the theta derivative should be written to * shared memory. Enables calculation of dtheta if set. - * \tparam[in] writeGlobal A boolean which tells if the theta values and gridlines should + * \tparam writeGlobal A boolean which tells if the theta values and gridlines should * be written to global memory. Enables calculation of dtheta if * set. - * \tparam[in] numGrids The number of grids using the splines. + * \tparam numGrids The number of grids using the splines. * \param[in] kernelParams Input PME CUDA data in constant memory. * \param[in] atomIndexOffset Starting atom index for the execution block w.r.t. global memory. * \param[in] atomX Atom coordinate of atom processed by thread. diff --git a/src/gromacs/ewald/pme_solve.cu b/src/gromacs/ewald/pme_solve.cu index 83e21b1f11..42dbf9c0ec 100644 --- a/src/gromacs/ewald/pme_solve.cu +++ b/src/gromacs/ewald/pme_solve.cu @@ -52,9 +52,9 @@ /*! \brief * PME complex grid solver kernel function. * - * \tparam[in] gridOrdering Specifies the dimension ordering of the complex grid. - * \tparam[in] computeEnergyAndVirial Tells if the reciprocal energy and virial should be computed. - * \tparam[in] gridIndex The index of the grid to use in the kernel. + * \tparam gridOrdering Specifies the dimension ordering of the complex grid. + * \tparam computeEnergyAndVirial Tells if the reciprocal energy and virial should be computed. + * \tparam gridIndex The index of the grid to use in the kernel. * \param[in] kernelParams Input PME CUDA data in constant memory. */ template diff --git a/src/gromacs/ewald/pme_spread.cu b/src/gromacs/ewald/pme_spread.cu index 4765c9c4c8..d0856602a9 100644 --- a/src/gromacs/ewald/pme_spread.cu +++ b/src/gromacs/ewald/pme_spread.cu @@ -69,11 +69,11 @@ * This corresponds to the CPU function spread_coefficients_bsplines_thread(). * Optional second stage of the spline_and_spread_kernel. * - * \tparam[in] order PME interpolation order. - * \tparam[in] wrapX Whether the grid overlap in dimension X should be wrapped. - * \tparam[in] wrapY Whether the grid overlap in dimension Y should be wrapped. - * \tparam[in] gridIndex The index of the grid to use in the kernel. - * \tparam[in] threadsPerAtom How many threads work on each atom + * \tparam order PME interpolation order. + * \tparam wrapX Whether the grid overlap in dimension X should be wrapped. + * \tparam wrapY Whether the grid overlap in dimension Y should be wrapped. + * \tparam gridIndex The index of the grid to use in the kernel. + * \tparam threadsPerAtom How many threads work on each atom * * \param[in] kernelParams Input PME CUDA data in constant memory. * \param[in] atomCharge Atom charge/coefficient of atom processed by thread. @@ -169,15 +169,15 @@ __device__ __forceinline__ void spread_charges(const PmeGpuCudaKernelParams kern * writeGlobal should be used removing the need to recalculate the theta values in the gather kernel. * Similarly for useOrderThreads large systems order threads per atom gives higher performance than order*order threads * - * \tparam[in] order PME interpolation order. - * \tparam[in] computeSplines A boolean which tells if the spline parameter and + * \tparam order PME interpolation order. + * \tparam computeSplines A boolean which tells if the spline parameter and * gridline indices' computation should be performed. - * \tparam[in] spreadCharges A boolean which tells if the charge spreading should be performed. - * \tparam[in] wrapX A boolean which tells if the grid overlap in dimension X should be wrapped. - * \tparam[in] wrapY A boolean which tells if the grid overlap in dimension Y should be wrapped. - * \tparam[in] numGrids The number of grids to use in the kernel. Can be 1 or 2. - * \tparam[in] writeGlobal A boolean which tells if the theta values and gridlines should be written to global memory. - * \tparam[in] threadsPerAtom How many threads work on each atom + * \tparam spreadCharges A boolean which tells if the charge spreading should be performed. + * \tparam wrapX A boolean which tells if the grid overlap in dimension X should be wrapped. + * \tparam wrapY A boolean which tells if the grid overlap in dimension Y should be wrapped. + * \tparam numGrids The number of grids to use in the kernel. Can be 1 or 2. + * \tparam writeGlobal A boolean which tells if the theta values and gridlines should be written to global memory. + * \tparam threadsPerAtom How many threads work on each atom * \param[in] kernelParams Input PME CUDA data in constant memory. */ template diff --git a/src/gromacs/gpu_utils/cuda_kernel_utils.cuh b/src/gromacs/gpu_utils/cuda_kernel_utils.cuh index 0333d84ae8..b8497c68f1 100644 --- a/src/gromacs/gpu_utils/cuda_kernel_utils.cuh +++ b/src/gromacs/gpu_utils/cuda_kernel_utils.cuh @@ -58,7 +58,7 @@ __device__ __forceinline__ T LDG(const T* ptr) /*! \brief Fetch the value by \p index from the texture object. * - * \tparam[in] T Raw data type + * \tparam T Raw data type * \param[in] texObj Table texture object * \param[in] index Non-negative element index * \returns The value from the table at \p index @@ -77,7 +77,7 @@ static __forceinline__ __device__ T fetchFromTexture(const cudaTextureObject_t t * Depending on what is supported, it fetches parameters either * using direct load or texture objects. * - * \tparam[in] T Raw data type + * \tparam T Raw data type * \param[in] d_ptr Device pointer to the raw table memory * \param[in] texObj Table texture object * \param[in] index Non-negative element index -- 2.22.0