Test gmxapi and clients through Py 3.9.
[alexxy/gromacs.git] / src / gromacs / ewald / pme_gpu_3dfft.cu
1 /*
2  * This file is part of the GROMACS molecular simulation package.
3  *
4  * Copyright (c) 2016,2017,2018,2019,2020,2021, by the GROMACS development team, led by
5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6  * and including many others, as listed in the AUTHORS file in the
7  * top-level source directory and at http://www.gromacs.org.
8  *
9  * GROMACS is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public License
11  * as published by the Free Software Foundation; either version 2.1
12  * of the License, or (at your option) any later version.
13  *
14  * GROMACS is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with GROMACS; if not, see
21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
23  *
24  * If you want to redistribute modifications to GROMACS, please
25  * consider that scientific software is very special. Version
26  * control is crucial - bugs must be traceable. We will be happy to
27  * consider code for inclusion in the official distribution, but
28  * derived work must not be called official GROMACS. Details are found
29  * in the README & COPYING files - if they are missing, get the
30  * official version at http://www.gromacs.org.
31  *
32  * To help us fund GROMACS development, we humbly ask that you cite
33  * the research papers on the package. Check out http://www.gromacs.org.
34  */
35
36 /*! \internal \file
37  *  \brief Implements CUDA FFT routines for PME GPU.
38  *
39  *  \author Aleksei Iupinov <a.yupinov@gmail.com>
40  *  \ingroup module_ewald
41  */
42
43 #include "gmxpre.h"
44
45 #include "pme_gpu_3dfft.h"
46
47 #include <cufft.h>
48
49 #include "gromacs/gpu_utils/device_stream.h"
50 #include "gromacs/utility/fatalerror.h"
51 #include "gromacs/utility/gmxassert.h"
52
53 class GpuParallel3dFft::Impl
54 {
55 public:
56     Impl(ivec                 realGridSize,
57          ivec                 realGridSizePadded,
58          ivec                 complexGridSizePadded,
59          bool                 useDecomposition,
60          bool                 performOutOfPlaceFFT,
61          const DeviceContext& context,
62          const DeviceStream&  pmeStream,
63          DeviceBuffer<float>  realGrid,
64          DeviceBuffer<float>  complexGrid);
65     ~Impl();
66
67     cufftHandle   planR2C_;
68     cufftHandle   planC2R_;
69     cufftReal*    realGrid_;
70     cufftComplex* complexGrid_;
71 };
72
73 static void handleCufftError(cufftResult_t status, const char* msg)
74 {
75     if (status != CUFFT_SUCCESS)
76     {
77         gmx_fatal(FARGS, "%s (error code %d)\n", msg, status);
78     }
79 }
80
81 GpuParallel3dFft::Impl::Impl(ivec       realGridSize,
82                              ivec       realGridSizePadded,
83                              ivec       complexGridSizePadded,
84                              const bool useDecomposition,
85                              const bool /*performOutOfPlaceFFT*/,
86                              const DeviceContext& /*context*/,
87                              const DeviceStream& pmeStream,
88                              DeviceBuffer<float> realGrid,
89                              DeviceBuffer<float> complexGrid) :
90     realGrid_(reinterpret_cast<cufftReal*>(realGrid)),
91     complexGrid_(reinterpret_cast<cufftComplex*>(complexGrid))
92 {
93     GMX_RELEASE_ASSERT(!useDecomposition, "FFT decomposition not implemented");
94
95     const int complexGridSizePaddedTotal =
96             complexGridSizePadded[XX] * complexGridSizePadded[YY] * complexGridSizePadded[ZZ];
97     const int realGridSizePaddedTotal =
98             realGridSizePadded[XX] * realGridSizePadded[YY] * realGridSizePadded[ZZ];
99
100     GMX_RELEASE_ASSERT(realGrid_, "Bad (null) input real-space grid");
101     GMX_RELEASE_ASSERT(complexGrid_, "Bad (null) input complex grid");
102
103     cufftResult_t result;
104     /* Commented code for a simple 3D grid with no padding */
105     /*
106        result = cufftPlan3d(&planR2C_, realGridSize[XX], realGridSize[YY], realGridSize[ZZ],
107        CUFFT_R2C); handleCufftError(result, "cufftPlan3d R2C plan failure");
108
109        result = cufftPlan3d(&planC2R_, realGridSize[XX], realGridSize[YY], realGridSize[ZZ],
110        CUFFT_C2R); handleCufftError(result, "cufftPlan3d C2R plan failure");
111      */
112
113     const int rank = 3, batch = 1;
114     result = cufftPlanMany(&planR2C_,
115                            rank,
116                            realGridSize,
117                            realGridSizePadded,
118                            1,
119                            realGridSizePaddedTotal,
120                            complexGridSizePadded,
121                            1,
122                            complexGridSizePaddedTotal,
123                            CUFFT_R2C,
124                            batch);
125     handleCufftError(result, "cufftPlanMany R2C plan failure");
126
127     result = cufftPlanMany(&planC2R_,
128                            rank,
129                            realGridSize,
130                            complexGridSizePadded,
131                            1,
132                            complexGridSizePaddedTotal,
133                            realGridSizePadded,
134                            1,
135                            realGridSizePaddedTotal,
136                            CUFFT_C2R,
137                            batch);
138     handleCufftError(result, "cufftPlanMany C2R plan failure");
139
140     cudaStream_t stream = pmeStream.stream();
141     GMX_RELEASE_ASSERT(stream, "Can not use the default CUDA stream for PME cuFFT");
142
143     result = cufftSetStream(planR2C_, stream);
144     handleCufftError(result, "cufftSetStream R2C failure");
145
146     result = cufftSetStream(planC2R_, stream);
147     handleCufftError(result, "cufftSetStream C2R failure");
148 }
149
150 GpuParallel3dFft::Impl::~Impl()
151 {
152     cufftResult_t result;
153     result = cufftDestroy(planR2C_);
154     handleCufftError(result, "cufftDestroy R2C failure");
155     result = cufftDestroy(planC2R_);
156     handleCufftError(result, "cufftDestroy C2R failure");
157 }
158
159 void GpuParallel3dFft::perform3dFft(gmx_fft_direction dir, CommandEvent* /*timingEvent*/)
160 {
161     cufftResult_t result;
162     if (dir == GMX_FFT_REAL_TO_COMPLEX)
163     {
164         result = cufftExecR2C(impl_->planR2C_, impl_->realGrid_, impl_->complexGrid_);
165         handleCufftError(result, "cuFFT R2C execution failure");
166     }
167     else
168     {
169         result = cufftExecC2R(impl_->planC2R_, impl_->complexGrid_, impl_->realGrid_);
170         handleCufftError(result, "cuFFT C2R execution failure");
171     }
172 }
173
174 GpuParallel3dFft::GpuParallel3dFft(ivec                 realGridSize,
175                                    ivec                 realGridSizePadded,
176                                    ivec                 complexGridSizePadded,
177                                    const bool           useDecomposition,
178                                    const bool           performOutOfPlaceFFT,
179                                    const DeviceContext& context,
180                                    const DeviceStream&  pmeStream,
181                                    DeviceBuffer<float>  realGrid,
182                                    DeviceBuffer<float>  complexGrid) :
183     impl_(std::make_unique<Impl>(realGridSize,
184                                  realGridSizePadded,
185                                  complexGridSizePadded,
186                                  useDecomposition,
187                                  performOutOfPlaceFFT,
188                                  context,
189                                  pmeStream,
190                                  realGrid,
191                                  complexGrid))
192 {
193 }
194
195 GpuParallel3dFft::~GpuParallel3dFft() = default;