Template X buffer ops on setFillerCoords
authorSzilárd Páll <pall.szilard@gmail.com>
Tue, 17 Dec 2019 02:06:55 +0000 (03:06 +0100)
committerArtem Zhmurov <zhmurov@gmail.com>
Sun, 19 Jan 2020 11:58:53 +0000 (12:58 +0100)
Also fix a typo and remove an outdated TODO.

Change-Id: I4d35a889fa71b0096e825b95a02860ab6420c4ad

src/gromacs/nbnxm/cuda/nbnxm_buffer_ops_kernels.cuh
src/gromacs/nbnxm/cuda/nbnxm_cuda.cu

index efb713c093b8f1c414b70867594fe11d20e90401..db3fb4a939ddcc5ae1704241fe9028bc61ef06d6 100644 (file)
 /*! \brief CUDA kernel for transforming position coordinates from rvec to nbnxm layout.
  *
  * TODO:
- *  - improve/simplify/document use of cxy_na and na_round
  *  - rename kernel so naming matches with the other NBNXM kernels;
  *  - enable separate compilation unit
 
  * \param[in]     numColumns          Extent of cell-level parallelism.
  * \param[out]    gm_xq               Coordinates buffer in nbnxm layout.
- * \param[in]     setFillerCoords     Whether to set the coordinates of the filler particles.
+ * \tparam        setFillerCoords     Whether to set the coordinates of the filler particles.
  * \param[in]     gm_x                Coordinates buffer.
  * \param[in]     gm_atomIndex        Atom index mapping.
  * \param[in]     gm_numAtoms         Array of number of atoms.
  * \param[in]     gm_cellIndex        Array of cell indices.
- * \param[in]     cellOffset          Airst cell.
+ * \param[in]     cellOffset          First cell.
  * \param[in]     numAtomsPerCell     Number of atoms per cell.
  */
+template<bool setFillerCoords>
 static __global__ void nbnxn_gpu_x_to_nbat_x_kernel(int numColumns,
                                                     float4* __restrict__ gm_xq,
-                                                    bool setFillerCoords,
                                                     const float3* __restrict__ gm_x,
                                                     const int* __restrict__ gm_atomIndex,
                                                     const int* __restrict__ gm_numAtoms,
index 57504d06bb15e130d71eaf81a953d5b87af2996c..f5135d7bfbf6a1a1de7639a54c05ef2eb6609f88 100644 (file)
@@ -859,14 +859,15 @@ void nbnxn_gpu_x_to_nbat_x(const Nbnxm::Grid&        grid,
         config.sharedMemorySize = 0;
         config.stream           = stream;
 
-        auto       kernelFn      = nbnxn_gpu_x_to_nbat_x_kernel;
+        auto kernelFn = setFillerCoords ? nbnxn_gpu_x_to_nbat_x_kernel<true>
+                                        : nbnxn_gpu_x_to_nbat_x_kernel<false>;
         float4*    d_xq          = adat->xq;
         const int* d_atomIndices = nb->atomIndices;
         const int* d_cxy_na      = &nb->cxy_na[numColumnsMax * gridId];
         const int* d_cxy_ind     = &nb->cxy_ind[numColumnsMax * gridId];
-        const auto kernelArgs    = prepareGpuKernelArguments(
-                kernelFn, config, &numColumns, &d_xq, &setFillerCoords, &d_x, &d_atomIndices,
-                &d_cxy_na, &d_cxy_ind, &cellOffset, &numAtomsPerCell);
+        const auto kernelArgs =
+                prepareGpuKernelArguments(kernelFn, config, &numColumns, &d_xq, &d_x, &d_atomIndices,
+                                          &d_cxy_na, &d_cxy_ind, &cellOffset, &numAtomsPerCell);
         launchGpuKernel(kernelFn, config, nullptr, "XbufferOps", kernelArgs);
     }