From: Roland Schulz Date: Mon, 17 May 2021 22:14:32 +0000 (-0700) Subject: Fix build with clang-cuda X-Git-Url: http://biod.pnpi.spb.ru/gitweb/?a=commitdiff_plain;h=fe2e63b7debbd2b2fc315fee087c0de161cfc9c6;p=alexxy%2Fgromacs.git Fix build with clang-cuda --- diff --git a/cmake/gmxManageClangCudaConfig.cmake b/cmake/gmxManageClangCudaConfig.cmake index c68dce6853..5d35ac7f55 100644 --- a/cmake/gmxManageClangCudaConfig.cmake +++ b/cmake/gmxManageClangCudaConfig.cmake @@ -1,7 +1,7 @@ # # This file is part of the GROMACS molecular simulation package. # -# Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by +# Copyright (c) 2017,2018,2019,2020,2021, by the GROMACS development team, led by # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, # and including many others, as listed in the AUTHORS file in the # top-level source directory and at http://www.gromacs.org. @@ -57,8 +57,10 @@ if (GMX_CUDA_TARGET_SM) foreach(_target ${_target_sm_list}) list(APPEND _CUDA_CLANG_GENCODE_FLAGS "--cuda-gpu-arch=sm_${_target}") endforeach() -else() - list(APPEND _CUDA_CLANG_GENCODE_FLAGS "--cuda-gpu-arch=sm_30") + else() + if(CUDA_VERSION VERSION_LESS "11.0") + list(APPEND _CUDA_CLANG_GENCODE_FLAGS "--cuda-gpu-arch=sm_30") + endif() list(APPEND _CUDA_CLANG_GENCODE_FLAGS "--cuda-gpu-arch=sm_35") # clang 6.0 + CUDA 9.0 seems to have issues generating code for sm_37 if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 6.0 OR CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 6.0.999) diff --git a/src/gromacs/ewald/pme_gpu_program_impl.cu b/src/gromacs/ewald/pme_gpu_program_impl.cu index 1358793807..7f47537bbb 100644 --- a/src/gromacs/ewald/pme_gpu_program_impl.cu +++ b/src/gromacs/ewald/pme_gpu_program_impl.cu @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by + * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -60,74 +60,74 @@ constexpr int c_stateB = 1; //! PME CUDA kernels forward declarations. Kernels are documented in their respective files. template -void pme_spline_and_spread_kernel(const PmeGpuCudaKernelParams kernelParams); +__global__ void pme_spline_and_spread_kernel(const PmeGpuCudaKernelParams kernelParams); // Add extern declarations to inform that there will be a definition // provided in another translation unit. // clang-format off -extern template void +extern template __global__ void pme_spline_and_spread_kernel(const PmeGpuCudaKernelParams); -extern template void +extern template __global__ void pme_spline_and_spread_kernel(const PmeGpuCudaKernelParams); -extern template void +extern template __global__ void pme_spline_and_spread_kernel(const PmeGpuCudaKernelParams); -extern template void +extern template __global__ void pme_spline_and_spread_kernel(const PmeGpuCudaKernelParams); -extern template void +extern template __global__ void pme_spline_and_spread_kernel(const PmeGpuCudaKernelParams); -extern template void +extern template __global__ void pme_spline_and_spread_kernel(const PmeGpuCudaKernelParams); -extern template void +extern template __global__ void pme_spline_and_spread_kernel(const PmeGpuCudaKernelParams); -extern template void +extern template __global__ void pme_spline_and_spread_kernel(const PmeGpuCudaKernelParams); -extern template void +extern template __global__ void pme_spline_and_spread_kernel(const PmeGpuCudaKernelParams); -extern template void +extern template __global__ void pme_spline_and_spread_kernel(const PmeGpuCudaKernelParams); -extern template void +extern template __global__ void pme_spline_and_spread_kernel(const PmeGpuCudaKernelParams); -extern template void +extern template __global__ void pme_spline_and_spread_kernel(const PmeGpuCudaKernelParams); -extern template void +extern template __global__ void pme_spline_and_spread_kernel(const PmeGpuCudaKernelParams); -extern template void +extern template __global__ void pme_spline_and_spread_kernel(const PmeGpuCudaKernelParams); -extern template void +extern template __global__ void pme_spline_and_spread_kernel(const PmeGpuCudaKernelParams); -extern template void +extern template __global__ void pme_spline_and_spread_kernel(const PmeGpuCudaKernelParams); template /* It is significantly slower to pass gridIndex as a kernel parameter */ -void pme_solve_kernel(const PmeGpuCudaKernelParams kernelParams); +__global__ void pme_solve_kernel(const PmeGpuCudaKernelParams kernelParams); // Add extern declarations to inform that there will be a definition // provided in another translation unit. // clang-format off -extern template void pme_solve_kernel(const PmeGpuCudaKernelParams); -extern template void pme_solve_kernel(const PmeGpuCudaKernelParams); -extern template void pme_solve_kernel(const PmeGpuCudaKernelParams); -extern template void pme_solve_kernel(const PmeGpuCudaKernelParams); -extern template void pme_solve_kernel(const PmeGpuCudaKernelParams); -extern template void pme_solve_kernel(const PmeGpuCudaKernelParams); -extern template void pme_solve_kernel(const PmeGpuCudaKernelParams); -extern template void pme_solve_kernel(const PmeGpuCudaKernelParams); +extern template __global__ void pme_solve_kernel(const PmeGpuCudaKernelParams); +extern template __global__ void pme_solve_kernel(const PmeGpuCudaKernelParams); +extern template __global__ void pme_solve_kernel(const PmeGpuCudaKernelParams); +extern template __global__ void pme_solve_kernel(const PmeGpuCudaKernelParams); +extern template __global__ void pme_solve_kernel(const PmeGpuCudaKernelParams); +extern template __global__ void pme_solve_kernel(const PmeGpuCudaKernelParams); +extern template __global__ void pme_solve_kernel(const PmeGpuCudaKernelParams); +extern template __global__ void pme_solve_kernel(const PmeGpuCudaKernelParams); // clang-format on template -void pme_gather_kernel(const PmeGpuCudaKernelParams kernelParams); +__global__ void pme_gather_kernel(const PmeGpuCudaKernelParams kernelParams); // Add extern declarations to inform that there will be a definition // provided in another translation unit. // clang-format off -extern template void pme_gather_kernel (const PmeGpuCudaKernelParams); -extern template void pme_gather_kernel (const PmeGpuCudaKernelParams); -extern template void pme_gather_kernel (const PmeGpuCudaKernelParams); -extern template void pme_gather_kernel(const PmeGpuCudaKernelParams); -extern template void pme_gather_kernel (const PmeGpuCudaKernelParams); -extern template void pme_gather_kernel (const PmeGpuCudaKernelParams); -extern template void pme_gather_kernel (const PmeGpuCudaKernelParams); -extern template void pme_gather_kernel (const PmeGpuCudaKernelParams); +extern template __global__ void pme_gather_kernel (const PmeGpuCudaKernelParams); +extern template __global__ void pme_gather_kernel (const PmeGpuCudaKernelParams); +extern template __global__ void pme_gather_kernel (const PmeGpuCudaKernelParams); +extern template __global__ void pme_gather_kernel(const PmeGpuCudaKernelParams); +extern template __global__ void pme_gather_kernel (const PmeGpuCudaKernelParams); +extern template __global__ void pme_gather_kernel (const PmeGpuCudaKernelParams); +extern template __global__ void pme_gather_kernel (const PmeGpuCudaKernelParams); +extern template __global__ void pme_gather_kernel (const PmeGpuCudaKernelParams); // clang-format on PmeGpuProgramImpl::PmeGpuProgramImpl(const DeviceContext& deviceContext) :