From: Szilárd Páll Date: Tue, 17 Dec 2019 02:06:55 +0000 (+0100) Subject: Template X buffer ops on setFillerCoords X-Git-Url: http://biod.pnpi.spb.ru/gitweb/?a=commitdiff_plain;h=a3047228f0ea0deb2ef0ea47397bfd92f877678c;p=alexxy%2Fgromacs.git Template X buffer ops on setFillerCoords Also fix a typo and remove an outdated TODO. Change-Id: I4d35a889fa71b0096e825b95a02860ab6420c4ad --- diff --git a/src/gromacs/nbnxm/cuda/nbnxm_buffer_ops_kernels.cuh b/src/gromacs/nbnxm/cuda/nbnxm_buffer_ops_kernels.cuh index efb713c093..db3fb4a939 100644 --- a/src/gromacs/nbnxm/cuda/nbnxm_buffer_ops_kernels.cuh +++ b/src/gromacs/nbnxm/cuda/nbnxm_buffer_ops_kernels.cuh @@ -48,23 +48,22 @@ /*! \brief CUDA kernel for transforming position coordinates from rvec to nbnxm layout. * * TODO: - * - improve/simplify/document use of cxy_na and na_round * - rename kernel so naming matches with the other NBNXM kernels; * - enable separate compilation unit * \param[in] numColumns Extent of cell-level parallelism. * \param[out] gm_xq Coordinates buffer in nbnxm layout. - * \param[in] setFillerCoords Whether to set the coordinates of the filler particles. + * \tparam setFillerCoords Whether to set the coordinates of the filler particles. * \param[in] gm_x Coordinates buffer. * \param[in] gm_atomIndex Atom index mapping. * \param[in] gm_numAtoms Array of number of atoms. * \param[in] gm_cellIndex Array of cell indices. - * \param[in] cellOffset Airst cell. + * \param[in] cellOffset First cell. * \param[in] numAtomsPerCell Number of atoms per cell. */ +template static __global__ void nbnxn_gpu_x_to_nbat_x_kernel(int numColumns, float4* __restrict__ gm_xq, - bool setFillerCoords, const float3* __restrict__ gm_x, const int* __restrict__ gm_atomIndex, const int* __restrict__ gm_numAtoms, diff --git a/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu b/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu index 57504d06bb..f5135d7bfb 100644 --- a/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu +++ b/src/gromacs/nbnxm/cuda/nbnxm_cuda.cu @@ -859,14 +859,15 @@ void nbnxn_gpu_x_to_nbat_x(const Nbnxm::Grid& grid, config.sharedMemorySize = 0; config.stream = stream; - auto kernelFn = nbnxn_gpu_x_to_nbat_x_kernel; + auto kernelFn = setFillerCoords ? nbnxn_gpu_x_to_nbat_x_kernel + : nbnxn_gpu_x_to_nbat_x_kernel; float4* d_xq = adat->xq; const int* d_atomIndices = nb->atomIndices; const int* d_cxy_na = &nb->cxy_na[numColumnsMax * gridId]; const int* d_cxy_ind = &nb->cxy_ind[numColumnsMax * gridId]; - const auto kernelArgs = prepareGpuKernelArguments( - kernelFn, config, &numColumns, &d_xq, &setFillerCoords, &d_x, &d_atomIndices, - &d_cxy_na, &d_cxy_ind, &cellOffset, &numAtomsPerCell); + const auto kernelArgs = + prepareGpuKernelArguments(kernelFn, config, &numColumns, &d_xq, &d_x, &d_atomIndices, + &d_cxy_na, &d_cxy_ind, &cellOffset, &numAtomsPerCell); launchGpuKernel(kernelFn, config, nullptr, "XbufferOps", kernelArgs); }