2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2021, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
37 * \brief Implements GPU 3D FFT routines for SYCL.
39 * \author Andrey Alekseenko <al42and@gmail.com>
40 * \author Mark Abraham <mark.j.abraham@gmail.com>
43 * In DPC++, we use Intel oneMKL to perform the FFT. It requires using the binary version of
44 * MKL, since the open-source one does not support FFT yet (https://github.com/oneapi-src/oneMKL/issues/27).
46 * There are issues with out-of-place transform, existing at least in oneAPI 2021.2-2021.4, so
47 * we allow only in-place transforms.
52 #include "gpu_3dfft_sycl_mkl.h"
56 #include "gromacs/gpu_utils/devicebuffer_sycl.h"
57 #include "gromacs/gpu_utils/device_stream.h"
58 #include "gromacs/utility/exceptions.h"
59 #include "gromacs/utility/gmxassert.h"
64 # error This file can only be compiled with Intel DPC++ compiler
68 # error Must use MKL library for FFT when compiling with Intel DPC++ compiler
72 #include <oneapi/mkl/dfti.hpp>
73 #include <oneapi/mkl/exceptions.hpp>
74 #include <mkl_version.h>
76 // oneAPI 2021.2.0 to 2021.4.0 have issues with backward out-of-place transform
77 static constexpr bool sc_mklHasBuggyOutOfPlaceFFT = (INTEL_MKL_VERSION <= 20210004);
82 Gpu3dFft::ImplSyclMkl::Descriptor Gpu3dFft::ImplSyclMkl::initDescriptor(const ivec realGridSize)
86 const std::vector<std::int64_t> realGridDimensions{ realGridSize[XX],
89 return { realGridDimensions };
91 catch (oneapi::mkl::exception& exc)
93 GMX_THROW(InternalError(formatString("MKL failure while constructing descriptor: %s", exc.what())));
97 Gpu3dFft::ImplSyclMkl::ImplSyclMkl(bool allocateGrids,
99 ArrayRef<const int> gridSizesInXForEachRank,
100 ArrayRef<const int> gridSizesInYForEachRank,
102 const bool performOutOfPlaceFFT,
103 const DeviceContext& /*context*/,
104 const DeviceStream& pmeStream,
106 ivec realGridSizePadded,
107 ivec complexGridSizePadded,
108 DeviceBuffer<float>* realGrid,
109 DeviceBuffer<float>* complexGrid) :
110 realGrid_(*realGrid->buffer_),
111 complexGrid_(*complexGrid->buffer_),
112 queue_(pmeStream.stream()),
113 r2cDescriptor_(initDescriptor(realGridSize)),
114 c2rDescriptor_(initDescriptor(realGridSize))
116 GMX_RELEASE_ASSERT(!allocateGrids, "Grids needs to be pre-allocated");
117 GMX_RELEASE_ASSERT(gridSizesInXForEachRank.size() == 1 && gridSizesInYForEachRank.size() == 1,
118 "Multi-rank FFT decomposition not implemented with SYCL MKL backend");
120 GMX_RELEASE_ASSERT(!(sc_mklHasBuggyOutOfPlaceFFT && performOutOfPlaceFFT),
121 "The version of MKL used does not properly support out-of-place FFTs");
123 GMX_ASSERT(checkDeviceBuffer(*realGrid,
124 realGridSizePadded[XX] * realGridSizePadded[YY] * realGridSizePadded[ZZ]),
125 "Real grid buffer is too small for the declared padded size");
127 GMX_ASSERT(checkDeviceBuffer(*complexGrid,
128 complexGridSizePadded[XX] * complexGridSizePadded[YY]
129 * complexGridSizePadded[ZZ] * 2),
130 "Complex grid buffer is too small for the declared padded size");
132 // MKL expects row-major
133 const std::array<MKL_LONG, 4> realGridStrides = {
134 0, static_cast<MKL_LONG>(realGridSizePadded[YY] * realGridSizePadded[ZZ]), realGridSizePadded[ZZ], 1
136 const std::array<MKL_LONG, 4> complexGridStrides = {
138 static_cast<MKL_LONG>(complexGridSizePadded[YY] * complexGridSizePadded[ZZ]),
139 complexGridSizePadded[ZZ],
143 const auto placement = performOutOfPlaceFFT ? DFTI_NOT_INPLACE : DFTI_INPLACE;
147 using oneapi::mkl::dft::config_param;
148 r2cDescriptor_.set_value(config_param::INPUT_STRIDES, realGridStrides.data());
149 r2cDescriptor_.set_value(config_param::OUTPUT_STRIDES, complexGridStrides.data());
150 r2cDescriptor_.set_value(config_param::CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX);
151 r2cDescriptor_.set_value(config_param::PLACEMENT, placement);
152 r2cDescriptor_.commit(queue_);
154 catch (oneapi::mkl::exception& exc)
156 GMX_THROW(InternalError(
157 formatString("MKL failure while configuring R2C descriptor: %s", exc.what())));
162 using oneapi::mkl::dft::config_param;
163 c2rDescriptor_.set_value(config_param::INPUT_STRIDES, complexGridStrides.data());
164 c2rDescriptor_.set_value(config_param::OUTPUT_STRIDES, realGridStrides.data());
165 c2rDescriptor_.set_value(config_param::CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX);
166 c2rDescriptor_.set_value(config_param::PLACEMENT, placement);
167 c2rDescriptor_.commit(queue_);
169 catch (oneapi::mkl::exception& exc)
171 GMX_THROW(InternalError(
172 formatString("MKL failure while configuring C2R descriptor: %s", exc.what())));
176 Gpu3dFft::ImplSyclMkl::~ImplSyclMkl() = default;
178 void Gpu3dFft::ImplSyclMkl::perform3dFft(gmx_fft_direction dir, CommandEvent* /*timingEvent*/)
182 case GMX_FFT_REAL_TO_COMPLEX:
185 oneapi::mkl::dft::compute_forward(r2cDescriptor_, realGrid_, complexGrid_);
187 catch (oneapi::mkl::exception& exc)
189 GMX_THROW(InternalError(
190 formatString("MKL failure while executing R2C transform: %s", exc.what())));
193 case GMX_FFT_COMPLEX_TO_REAL:
196 oneapi::mkl::dft::compute_backward(c2rDescriptor_, complexGrid_, realGrid_);
198 catch (oneapi::mkl::exception& exc)
200 GMX_THROW(InternalError(
201 formatString("MKL failure while executing C2R transform: %s", exc.what())));
205 GMX_THROW(NotImplementedError("The chosen 3D-FFT case is not implemented on GPUs"));