Fix hipSYCL build with CUDA target
[alexxy/gromacs.git] / src / gromacs / fft / gpu_3dfft.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2021, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35
36 /*! \internal \file
37  *  \brief Implements stub GPU 3D FFT routines for CPU-only builds
38  *
39  *  \author Mark Abraham <mark.j.abraham@gmail.com>
40  *  \author Gaurav Garg <gaugarg@nvidia.com>
41  *  \ingroup module_fft
42  */
43
44 #include "gmxpre.h"
45
46 #include "gpu_3dfft.h"
47 #include "gpu_3dfft_impl.h"
48
49 #if GMX_GPU_CUDA
50 #    include "gpu_3dfft_cufft.h"
51 #elif GMX_GPU_OPENCL
52 #    include "gpu_3dfft_ocl.h"
53 #elif GMX_GPU_SYCL
54 #    include "gpu_3dfft_sycl.h"
55 #    if GMX_SYCL_DPCPP && GMX_FFT_MKL
56 #        include "gpu_3dfft_sycl_mkl.h"
57 #    endif
58 #    if GMX_SYCL_HIPSYCL && GMX_HIPSYCL_HAVE_HIP_TARGET
59 #        include "gpu_3dfft_sycl_rocfft.h"
60 #    endif
61 #endif
62
63 #if Heffte_FOUND
64 #    include "gpu_3dfft_heffte.h"
65 #endif
66
67 #include "gromacs/utility/arrayref.h"
68 #include "gromacs/utility/exceptions.h"
69
70 namespace gmx
71 {
72
73 // [[noreturn]] attributes must be added in the common headers, so it's easier to silence the warning here
74 #ifdef __clang__
75 #    pragma clang diagnostic push
76 #    pragma clang diagnostic ignored "-Wmissing-noreturn"
77 #endif
78
79 #if (GMX_GPU_CUDA || GMX_GPU_OPENCL || GMX_GPU_SYCL)
80
81 Gpu3dFft::Gpu3dFft(FftBackend           backend,
82                    bool                 allocateGrids,
83                    MPI_Comm             comm,
84                    ArrayRef<const int>  gridSizesInXForEachRank,
85                    ArrayRef<const int>  gridSizesInYForEachRank,
86                    const int            nz,
87                    bool                 performOutOfPlaceFFT,
88                    const DeviceContext& context,
89                    const DeviceStream&  pmeStream,
90                    ivec                 realGridSize,
91                    ivec                 realGridSizePadded,
92                    ivec                 complexGridSizePadded,
93                    DeviceBuffer<float>* realGrid,
94                    DeviceBuffer<float>* complexGrid)
95 {
96 #    if GMX_GPU_CUDA
97     switch (backend)
98     {
99         case FftBackend::Cufft:
100             impl_ = std::make_unique<Gpu3dFft::ImplCuFft>(allocateGrids,
101                                                           comm,
102                                                           gridSizesInXForEachRank,
103                                                           gridSizesInYForEachRank,
104                                                           nz,
105                                                           performOutOfPlaceFFT,
106                                                           context,
107                                                           pmeStream,
108                                                           realGridSize,
109                                                           realGridSizePadded,
110                                                           complexGridSizePadded,
111                                                           realGrid,
112                                                           complexGrid);
113             break;
114         default:
115             GMX_RELEASE_ASSERT(backend == FftBackend::HeFFTe_CUDA,
116                                "Unsupported FFT backend requested");
117     }
118 #    elif GMX_GPU_OPENCL
119     switch (backend)
120     {
121         case FftBackend::Ocl:
122             impl_ = std::make_unique<Gpu3dFft::ImplOcl>(allocateGrids,
123                                                         comm,
124                                                         gridSizesInXForEachRank,
125                                                         gridSizesInYForEachRank,
126                                                         nz,
127                                                         performOutOfPlaceFFT,
128                                                         context,
129                                                         pmeStream,
130                                                         realGridSize,
131                                                         realGridSizePadded,
132                                                         complexGridSizePadded,
133                                                         realGrid,
134                                                         complexGrid);
135             break;
136         default: GMX_THROW(InternalError("Unsupported FFT backend requested"));
137     }
138 #    elif GMX_GPU_SYCL
139     switch (backend)
140     {
141 #        if GMX_SYCL_DPCPP && GMX_FFT_MKL
142         case FftBackend::SyclMkl:
143             impl_ = std::make_unique<Gpu3dFft::ImplSyclMkl>(allocateGrids,
144                                                             comm,
145                                                             gridSizesInXForEachRank,
146                                                             gridSizesInYForEachRank,
147                                                             nz,
148                                                             performOutOfPlaceFFT,
149                                                             context,
150                                                             pmeStream,
151                                                             realGridSize,
152                                                             realGridSizePadded,
153                                                             complexGridSizePadded,
154                                                             realGrid,
155                                                             complexGrid);
156             break;
157 #        endif
158 #        if GMX_SYCL_HIPSYCL && GMX_HIPSYCL_HAVE_HIP_TARGET
159         case FftBackend::SyclRocfft:
160             impl_ = std::make_unique<Gpu3dFft::ImplSyclRocfft>(allocateGrids,
161                                                                comm,
162                                                                gridSizesInXForEachRank,
163                                                                gridSizesInYForEachRank,
164                                                                nz,
165                                                                performOutOfPlaceFFT,
166                                                                context,
167                                                                pmeStream,
168                                                                realGridSize,
169                                                                realGridSizePadded,
170                                                                complexGridSizePadded,
171                                                                realGrid,
172                                                                complexGrid);
173             break;
174 #        endif
175         case FftBackend::Sycl:
176             impl_ = std::make_unique<Gpu3dFft::ImplSycl>(allocateGrids,
177                                                          comm,
178                                                          gridSizesInXForEachRank,
179                                                          gridSizesInYForEachRank,
180                                                          nz,
181                                                          performOutOfPlaceFFT,
182                                                          context,
183                                                          pmeStream,
184                                                          realGridSize,
185                                                          realGridSizePadded,
186                                                          complexGridSizePadded,
187                                                          realGrid,
188                                                          complexGrid);
189             break;
190         default: GMX_THROW(InternalError("Unsupported FFT backend requested"));
191     }
192 #    endif
193
194 #    if Heffte_FOUND
195     switch (backend)
196     {
197         case FftBackend::HeFFTe_CUDA:
198             GMX_RELEASE_ASSERT(
199                     GMX_GPU_CUDA,
200                     "HeFFTe_CUDA FFT backend is supported only with GROMACS compiled with CUDA");
201             GMX_RELEASE_ASSERT(heffte::backend::is_enabled<heffte::backend::cufft>::value,
202                                "HeFFTe not compiled with CUDA support");
203             impl_ = std::make_unique<Gpu3dFft::ImplHeFfte<heffte::backend::cufft>>(
204                     allocateGrids,
205                     comm,
206                     gridSizesInXForEachRank,
207                     gridSizesInYForEachRank,
208                     nz,
209                     performOutOfPlaceFFT,
210                     context,
211                     pmeStream,
212                     realGridSize,
213                     realGridSizePadded,
214                     complexGridSizePadded,
215                     realGrid,
216                     complexGrid);
217
218             break;
219         default: GMX_RELEASE_ASSERT(impl_ != nullptr, "Unsupported FFT backend requested");
220     }
221 #    endif
222 }
223
224 #else
225
226 Gpu3dFft::Gpu3dFft(FftBackend /*backend */,
227                    bool /*allocateGrids*/,
228                    MPI_Comm /*comm*/,
229                    ArrayRef<const int> /*gridSizesInXForEachRank*/,
230                    ArrayRef<const int> /*gridSizesInYForEachRank*/,
231                    const int /*nz*/,
232                    bool /*performOutOfPlaceFFT*/,
233                    const DeviceContext& /*context*/,
234                    const DeviceStream& /*pmeStream*/,
235                    ivec /*realGridSize*/,
236                    ivec /*realGridSizePadded*/,
237                    ivec /*complexGridSizePadded*/,
238                    DeviceBuffer<float>* /*realGrid*/,
239                    DeviceBuffer<float>* /*complexGrid*/)
240 {
241     GMX_THROW(InternalError("Cannot run GPU routines in a CPU-only configuration"));
242 }
243
244 #endif
245
246 Gpu3dFft::~Gpu3dFft() = default;
247
248 void Gpu3dFft::perform3dFft(gmx_fft_direction dir, CommandEvent* timingEvent)
249 {
250     GMX_RELEASE_ASSERT(impl_ != nullptr, "Cannot run GPU routines in a CPU-only configuration");
251     impl_->perform3dFft(dir, timingEvent);
252 }
253
254 #ifdef __clang__
255 #    pragma clang diagnostic pop
256 #endif
257
258 } // namespace gmx