/*! \brief CUDA kernel for transforming position coordinates from rvec to nbnxm layout.
*
* TODO:
- * - improve/simplify/document use of cxy_na and na_round
* - rename kernel so naming matches with the other NBNXM kernels;
* - enable separate compilation unit
* \param[in] numColumns Extent of cell-level parallelism.
* \param[out] gm_xq Coordinates buffer in nbnxm layout.
- * \param[in] setFillerCoords Whether to set the coordinates of the filler particles.
+ * \tparam setFillerCoords Whether to set the coordinates of the filler particles.
* \param[in] gm_x Coordinates buffer.
* \param[in] gm_atomIndex Atom index mapping.
* \param[in] gm_numAtoms Array of number of atoms.
* \param[in] gm_cellIndex Array of cell indices.
- * \param[in] cellOffset Airst cell.
+ * \param[in] cellOffset First cell.
* \param[in] numAtomsPerCell Number of atoms per cell.
*/
+template<bool setFillerCoords>
static __global__ void nbnxn_gpu_x_to_nbat_x_kernel(int numColumns,
float4* __restrict__ gm_xq,
- bool setFillerCoords,
const float3* __restrict__ gm_x,
const int* __restrict__ gm_atomIndex,
const int* __restrict__ gm_numAtoms,
config.sharedMemorySize = 0;
config.stream = stream;
- auto kernelFn = nbnxn_gpu_x_to_nbat_x_kernel;
+ auto kernelFn = setFillerCoords ? nbnxn_gpu_x_to_nbat_x_kernel<true>
+ : nbnxn_gpu_x_to_nbat_x_kernel<false>;
float4* d_xq = adat->xq;
const int* d_atomIndices = nb->atomIndices;
const int* d_cxy_na = &nb->cxy_na[numColumnsMax * gridId];
const int* d_cxy_ind = &nb->cxy_ind[numColumnsMax * gridId];
- const auto kernelArgs = prepareGpuKernelArguments(
- kernelFn, config, &numColumns, &d_xq, &setFillerCoords, &d_x, &d_atomIndices,
- &d_cxy_na, &d_cxy_ind, &cellOffset, &numAtomsPerCell);
+ const auto kernelArgs =
+ prepareGpuKernelArguments(kernelFn, config, &numColumns, &d_xq, &d_x, &d_atomIndices,
+ &d_cxy_na, &d_cxy_ind, &cellOffset, &numAtomsPerCell);
launchGpuKernel(kernelFn, config, nullptr, "XbufferOps", kernelArgs);
}