Add hipSYCL support to GPU 3DFFT
[alexxy/gromacs.git] / src / gromacs / fft / gpu_3dfft.cpp
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2021, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35
36 /*! \internal \file
37  *  \brief Implements stub GPU 3D FFT routines for CPU-only builds
38  *
39  *  \author Mark Abraham <mark.j.abraham@gmail.com>
40  *  \author Gaurav Garg <gaugarg@nvidia.com>
41  *  \ingroup module_fft
42  */
43
44 #include "gmxpre.h"
45
46 #include "gpu_3dfft.h"
47 #include "gpu_3dfft_impl.h"
48
49 #if GMX_GPU_CUDA
50 #    include "gpu_3dfft_cufft.h"
51 #elif GMX_GPU_OPENCL
52 #    include "gpu_3dfft_ocl.h"
53 #elif GMX_GPU_SYCL
54 #    include "gpu_3dfft_sycl.h"
55 #    if GMX_SYCL_HIPSYCL
56 #        include "gpu_3dfft_sycl_rocfft.h"
57 #    endif
58 #endif
59
60 #if Heffte_FOUND
61 #    include "gpu_3dfft_heffte.h"
62 #endif
63
64 #include "gromacs/utility/arrayref.h"
65 #include "gromacs/utility/exceptions.h"
66
67 namespace gmx
68 {
69
70 // [[noreturn]] attributes must be added in the common headers, so it's easier to silence the warning here
71 #ifdef __clang__
72 #    pragma clang diagnostic push
73 #    pragma clang diagnostic ignored "-Wmissing-noreturn"
74 #endif
75
76 #if (GMX_GPU_CUDA || GMX_GPU_OPENCL || GMX_GPU_SYCL)
77
78 Gpu3dFft::Gpu3dFft(FftBackend           backend,
79                    bool                 allocateGrids,
80                    MPI_Comm             comm,
81                    ArrayRef<const int>  gridSizesInXForEachRank,
82                    ArrayRef<const int>  gridSizesInYForEachRank,
83                    const int            nz,
84                    bool                 performOutOfPlaceFFT,
85                    const DeviceContext& context,
86                    const DeviceStream&  pmeStream,
87                    ivec                 realGridSize,
88                    ivec                 realGridSizePadded,
89                    ivec                 complexGridSizePadded,
90                    DeviceBuffer<float>* realGrid,
91                    DeviceBuffer<float>* complexGrid)
92 {
93 #    if GMX_GPU_CUDA
94     switch (backend)
95     {
96         case FftBackend::Cufft:
97             impl_ = std::make_unique<Gpu3dFft::ImplCuFft>(allocateGrids,
98                                                           comm,
99                                                           gridSizesInXForEachRank,
100                                                           gridSizesInYForEachRank,
101                                                           nz,
102                                                           performOutOfPlaceFFT,
103                                                           context,
104                                                           pmeStream,
105                                                           realGridSize,
106                                                           realGridSizePadded,
107                                                           complexGridSizePadded,
108                                                           realGrid,
109                                                           complexGrid);
110             break;
111         default:
112             GMX_RELEASE_ASSERT(backend == FftBackend::HeFFTe_CUDA,
113                                "Unsupported FFT backend requested");
114     }
115 #    elif GMX_GPU_OPENCL
116     switch (backend)
117     {
118         case FftBackend::Ocl:
119             impl_ = std::make_unique<Gpu3dFft::ImplOcl>(allocateGrids,
120                                                         comm,
121                                                         gridSizesInXForEachRank,
122                                                         gridSizesInYForEachRank,
123                                                         nz,
124                                                         performOutOfPlaceFFT,
125                                                         context,
126                                                         pmeStream,
127                                                         realGridSize,
128                                                         realGridSizePadded,
129                                                         complexGridSizePadded,
130                                                         realGrid,
131                                                         complexGrid);
132             break;
133         default: GMX_THROW(InternalError("Unsupported FFT backend requested"));
134     }
135 #    elif GMX_GPU_SYCL
136     switch (backend)
137     {
138 #        if GMX_SYCL_HIPSYCL
139         case FftBackend::SyclRocfft:
140             impl_ = std::make_unique<Gpu3dFft::ImplSyclRocfft>(allocateGrids,
141                                                                comm,
142                                                                gridSizesInXForEachRank,
143                                                                gridSizesInYForEachRank,
144                                                                nz,
145                                                                performOutOfPlaceFFT,
146                                                                context,
147                                                                pmeStream,
148                                                                realGridSize,
149                                                                realGridSizePadded,
150                                                                complexGridSizePadded,
151                                                                realGrid,
152                                                                complexGrid);
153             break;
154 #        endif
155         case FftBackend::Sycl:
156             impl_ = std::make_unique<Gpu3dFft::ImplSycl>(allocateGrids,
157                                                          comm,
158                                                          gridSizesInXForEachRank,
159                                                          gridSizesInYForEachRank,
160                                                          nz,
161                                                          performOutOfPlaceFFT,
162                                                          context,
163                                                          pmeStream,
164                                                          realGridSize,
165                                                          realGridSizePadded,
166                                                          complexGridSizePadded,
167                                                          realGrid,
168                                                          complexGrid);
169             break;
170         default: GMX_THROW(InternalError("Unsupported FFT backend requested"));
171     }
172 #    endif
173
174 #    if Heffte_FOUND
175     switch (backend)
176     {
177         case FftBackend::HeFFTe_CUDA:
178             GMX_RELEASE_ASSERT(
179                     GMX_GPU_CUDA,
180                     "HeFFTe_CUDA FFT backend is supported only with GROMACS compiled with CUDA");
181             GMX_RELEASE_ASSERT(heffte::backend::is_enabled<heffte::backend::cufft>::value,
182                                "HeFFTe not compiled with CUDA support");
183             impl_ = std::make_unique<Gpu3dFft::ImplHeFfte<heffte::backend::cufft>>(
184                     allocateGrids,
185                     comm,
186                     gridSizesInXForEachRank,
187                     gridSizesInYForEachRank,
188                     nz,
189                     performOutOfPlaceFFT,
190                     context,
191                     pmeStream,
192                     realGridSize,
193                     realGridSizePadded,
194                     complexGridSizePadded,
195                     realGrid,
196                     complexGrid);
197
198             break;
199         default: GMX_RELEASE_ASSERT(impl_ != nullptr, "Unsupported FFT backend requested");
200     }
201 #    endif
202 }
203
204 #else
205
206 Gpu3dFft::Gpu3dFft(FftBackend /*backend */,
207                    bool /*allocateGrids*/,
208                    MPI_Comm /*comm*/,
209                    ArrayRef<const int> /*gridSizesInXForEachRank*/,
210                    ArrayRef<const int> /*gridSizesInYForEachRank*/,
211                    const int /*nz*/,
212                    bool /*performOutOfPlaceFFT*/,
213                    const DeviceContext& /*context*/,
214                    const DeviceStream& /*pmeStream*/,
215                    ivec /*realGridSize*/,
216                    ivec /*realGridSizePadded*/,
217                    ivec /*complexGridSizePadded*/,
218                    DeviceBuffer<float>* /*realGrid*/,
219                    DeviceBuffer<float>* /*complexGrid*/)
220 {
221     GMX_THROW(InternalError("Cannot run GPU routines in a CPU-only configuration"));
222 }
223
224 #endif
225
226 Gpu3dFft::~Gpu3dFft() = default;
227
228 void Gpu3dFft::perform3dFft(gmx_fft_direction dir, CommandEvent* timingEvent)
229 {
230     GMX_RELEASE_ASSERT(impl_ != nullptr, "Cannot run GPU routines in a CPU-only configuration");
231     impl_->perform3dFft(dir, timingEvent);
232 }
233
234 #ifdef __clang__
235 #    pragma clang diagnostic pop
236 #endif
237
238 } // namespace gmx