From: Szilárd Páll Date: Mon, 1 Nov 2021 13:23:44 +0000 (+0000) Subject: Fix hipSYCL build with CUDA target X-Git-Url: http://biod.pnpi.spb.ru/gitweb/?p=alexxy%2Fgromacs.git;a=commitdiff_plain;h=a367a3396e4d592e3c1baae0404a81116e9a2e79 Fix hipSYCL build with CUDA target --- diff --git a/src/config.h.cmakein b/src/config.h.cmakein index 48d36ac9d5..61755b09cf 100644 --- a/src/config.h.cmakein +++ b/src/config.h.cmakein @@ -230,6 +230,12 @@ /* Define if hipSYCL is used for SYCL support (otherwise DPCPP is assumed) */ #cmakedefine01 GMX_SYCL_HIPSYCL +// Define if hipSYCL has HIP target(s) +#cmakedefine01 GMX_HIPSYCL_HAVE_HIP_TARGET + +// Define if hipSYCL has CUDA target(s) +#cmakedefine01 GMX_HIPSYCL_HAVE_CUDA_TARGET + /* Define if Intel's DPCPP is used (default unless hipSYCL is chosen) */ #define GMX_SYCL_DPCPP (!GMX_SYCL_HIPSYCL && GMX_GPU_SYCL) diff --git a/src/gromacs/CMakeLists.txt b/src/gromacs/CMakeLists.txt index e3876a38e6..bf530db081 100644 --- a/src/gromacs/CMakeLists.txt +++ b/src/gromacs/CMakeLists.txt @@ -190,7 +190,7 @@ if (TARGET Heffte::Heffte) target_link_libraries(libgromacs PRIVATE Heffte::Heffte) endif() -if (GMX_SYCL_HIPSYCL) +if (GMX_SYCL_HIPSYCL AND GMX_HIPSYCL_HAVE_HIP_TARGET) target_link_libraries(libgromacs PUBLIC roc::rocfft) endif() diff --git a/src/gromacs/fft/CMakeLists.txt b/src/gromacs/fft/CMakeLists.txt index a1c2e6899a..16a5a4b705 100644 --- a/src/gromacs/fft/CMakeLists.txt +++ b/src/gromacs/fft/CMakeLists.txt @@ -82,7 +82,7 @@ elseif (GMX_GPU_SYCL) gpu_3dfft_sycl_mkl.cpp ) endif() - if (GMX_SYCL_HIPSYCL) + if (GMX_SYCL_HIPSYCL AND GMX_HIPSYCL_HAVE_HIP_TARGET) gmx_add_libgromacs_sources( gpu_3dfft_sycl_rocfft.cpp ) diff --git a/src/gromacs/fft/gpu_3dfft.cpp b/src/gromacs/fft/gpu_3dfft.cpp index 7dd21185e7..3eeca1a784 100644 --- a/src/gromacs/fft/gpu_3dfft.cpp +++ b/src/gromacs/fft/gpu_3dfft.cpp @@ -55,7 +55,7 @@ # if GMX_SYCL_DPCPP && GMX_FFT_MKL # include "gpu_3dfft_sycl_mkl.h" # endif -# if GMX_SYCL_HIPSYCL +# if GMX_SYCL_HIPSYCL && GMX_HIPSYCL_HAVE_HIP_TARGET # include "gpu_3dfft_sycl_rocfft.h" # endif #endif @@ -155,7 +155,7 @@ Gpu3dFft::Gpu3dFft(FftBackend backend, complexGrid); break; # endif -# if GMX_SYCL_HIPSYCL +# if GMX_SYCL_HIPSYCL && GMX_HIPSYCL_HAVE_HIP_TARGET case FftBackend::SyclRocfft: impl_ = std::make_unique(allocateGrids, comm, diff --git a/src/gromacs/fft/tests/fft.cpp b/src/gromacs/fft/tests/fft.cpp index e06d89e0d2..b2dc98e4b5 100644 --- a/src/gromacs/fft/tests/fft.cpp +++ b/src/gromacs/fft/tests/fft.cpp @@ -72,6 +72,8 @@ namespace gmx { namespace test { +namespace +{ /*! \brief Input data for FFT tests. * @@ -364,8 +366,44 @@ TEST_F(FFTTest3D, Real5_6_9) checkRealGrid(realGridSize, realGridSizePadded, in_, outputRealGridValues); } -#if GMX_GPU_CUDA || GMX_GPU_OPENCL \ - || (GMX_GPU_SYCL && (GMX_SYCL_HIPSYCL || (GMX_SYCL_DPCPP && GMX_FFT_MKL))) +#if GMX_GPU + +/*! \brief Whether the FFT is in- or out-of-place + * + * DPCPP uses oneMKL, which seems to have troubles with out-of-place + * transforms. */ +static constexpr bool sc_performOutOfPlaceFFT = !(GMX_SYCL_DPCPP && GMX_FFT_MKL); + +/*! \brief Return the output grid depending on whether in- or out-of + * place FFT is used + * + * Some versions of clang complain of unused code if we would just + * branch on the value of sc_performOutOfPlaceFFT at run time, because + * in any single configuration there would indeed be unused code. So + * the two template specializations are needed so that the compiler + * only compiles the template that is used. */ +template +DeviceBuffer* actualOutputGrid(DeviceBuffer* realGrid, DeviceBuffer* complexGrid); + +# if GMX_SYCL_DPCPP && GMX_FFT_MKL + +template<> +DeviceBuffer* actualOutputGrid(DeviceBuffer* realGrid, + DeviceBuffer* /* complexGrid */) +{ + return realGrid; +}; + +# else + +template<> +DeviceBuffer* actualOutputGrid(DeviceBuffer* /* realGrid */, DeviceBuffer* complexGrid) +{ + return complexGrid; +} + +# endif + TEST_F(FFTTest3D, GpuReal5_6_9) { // Ensure library resources are managed appropriately @@ -398,28 +436,46 @@ TEST_F(FFTTest3D, GpuReal5_6_9) // Use std::copy to convert from double to real easily std::copy(inputdata, inputdata + sizeInReals, in_.begin()); - // DPCPP uses oneMKL, which seems to have troubles with out-of-place transforms - const bool performOutOfPlaceFFT = !GMX_SYCL_DPCPP; - - SCOPED_TRACE("Allocating the device buffers"); - DeviceBuffer realGrid, complexGrid; - allocateDeviceBuffer(&realGrid, in_.size(), deviceContext); - if (performOutOfPlaceFFT) - { - allocateDeviceBuffer(&complexGrid, complexGridValues.size(), deviceContext); - } - # if GMX_GPU_CUDA const FftBackend backend = FftBackend::Cufft; # elif GMX_GPU_OPENCL const FftBackend backend = FftBackend::Ocl; # elif GMX_GPU_SYCL # if GMX_SYCL_HIPSYCL +# if GMX_HIPSYCL_HAVE_HIP_TARGET const FftBackend backend = FftBackend::SyclRocfft; -# elif GMX_SYCL_DPCPP && GMX_FFT_MKL +# else + // Use stub backend so compilation succeeds + const FftBackend backend = FftBackend::Sycl; + // Don't complain about unused reference data + checker.disableUnusedEntriesCheck(); + // Skip the rest of the test + GTEST_SKIP() << "Only rocFFT backend is supported with hipSYCL"; +# endif +# elif GMX_SYCL_DPCPP +# if GMX_FFT_MKL const FftBackend backend = FftBackend::SyclMkl; +# else + // Use stub backend so compilation succeeds + const FftBackend backend = FftBackend::Sycl; + // Don't complain about unused reference data + checker.disableUnusedEntriesCheck(); + // Skip the rest of the test + GTEST_SKIP() << "Only MKL backend is supported with DPC++"; +# endif +# else +# error "Unsupported SYCL implementation" # endif # endif + + SCOPED_TRACE("Allocating the device buffers"); + DeviceBuffer realGrid, complexGrid; + allocateDeviceBuffer(&realGrid, in_.size(), deviceContext); + if (sc_performOutOfPlaceFFT) + { + allocateDeviceBuffer(&complexGrid, complexGridValues.size(), deviceContext); + } + MPI_Comm comm = MPI_COMM_NULL; const bool allocateGrid = false; std::array gridSizesInXForEachRank = { 0 }; @@ -431,14 +487,14 @@ TEST_F(FFTTest3D, GpuReal5_6_9) gridSizesInXForEachRank, gridSizesInYForEachRank, nz, - performOutOfPlaceFFT, + sc_performOutOfPlaceFFT, deviceContext, deviceStream, realGridSize, realGridSizePadded, complexGridSizePadded, &realGrid, - performOutOfPlaceFFT ? &complexGrid : &realGrid); + actualOutputGrid(&realGrid, &complexGrid)); // Transfer the real grid input data for the FFT copyToDeviceBuffer( @@ -451,7 +507,7 @@ TEST_F(FFTTest3D, GpuReal5_6_9) // Check the complex grid (NB this data has not been normalized) copyFromDeviceBuffer(complexGridValues.data(), - performOutOfPlaceFFT ? &complexGrid : &realGrid, + actualOutputGrid(&realGrid, &complexGrid), 0, complexGridValues.size(), deviceStream, @@ -461,7 +517,7 @@ TEST_F(FFTTest3D, GpuReal5_6_9) complexGridValues.begin(), complexGridValues.end(), "ComplexGridAfterRealToComplex"); std::vector outputRealGridValues(in_.size()); - if (performOutOfPlaceFFT) + if (sc_performOutOfPlaceFFT) { // Clear the real grid input data for the FFT so we can // compute the back transform into it and observe that it did @@ -492,7 +548,7 @@ TEST_F(FFTTest3D, GpuReal5_6_9) SCOPED_TRACE("Cleaning up"); freeDeviceBuffer(&realGrid); - if (performOutOfPlaceFFT) + if (sc_performOutOfPlaceFFT) { freeDeviceBuffer(&complexGrid); } @@ -501,5 +557,6 @@ TEST_F(FFTTest3D, GpuReal5_6_9) #endif +} // namespace } // namespace test } // namespace gmx