2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2003 David van der Spoel, Erik Lindahl, University of Groningen.
5 * Copyright (c) 2013,2014,2015,2017,2018 by the GROMACS development team.
6 * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
48 #include "gromacs/fft/fft.h"
49 #include "gromacs/utility/exceptions.h"
50 #include "gromacs/utility/fatalerror.h"
53 # define FFTWPREFIX(name) fftw_##name
55 # define FFTWPREFIX(name) fftwf_##name
58 /* none of the fftw3 calls, except execute(), are thread-safe, so
59 we need to serialize them with this mutex. */
60 // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
61 static std::mutex big_fftw_mutex;
65 big_fftw_mutex.lock(); \
67 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
71 big_fftw_mutex.unlock(); \
73 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
75 /* We assume here that aligned memory starts at multiple of 16 bytes and unaligned memory starts at multiple of 8 bytes. The later is guranteed for all malloc implementation.
77 - It is not allowed to use these FFT plans from memory which doesn't have a starting address as a multiple of 8 bytes.
78 This is OK as long as the memory directly comes from malloc and is not some subarray within alloated memory.
79 - This has to be fixed if any future architecute requires memory to be aligned to multiples of 32 bytes.
83 * Contents of the FFTW3 fft datatype.
85 * Note that this is one of several possible implementations of gmx_fft_t.
96 * Three alternatives (unaligned/aligned, out-of-place/in-place, forward/backward)
97 * results in 8 different FFTW plans. Keep track of them with 3 array indices:
98 * first index: 0=unaligned, 1=aligned
99 * second index: 0=out-of-place, 1=in-place
100 * third index: 0=backward, 1=forward
102 FFTWPREFIX(plan) plan[2][2][2];
103 /** Used to catch user mistakes */
105 /** Number of dimensions in the FFT */
109 int gmx_fft_init_1d(gmx_fft_t* pfft, int nx, gmx_fft_flag flags)
111 return gmx_fft_init_many_1d(pfft, nx, 1, flags);
115 int gmx_fft_init_many_1d(gmx_fft_t* pfft, int nx, int howmany, gmx_fft_flag flags)
118 FFTWPREFIX(complex) * p1, *p2, *up1, *up2;
123 #if GMX_DISABLE_FFTW_MEASURE
124 flags |= GMX_FFT_FLAG_CONSERVATIVE;
127 fftw_flags = (flags & GMX_FFT_FLAG_CONSERVATIVE) ? FFTW_ESTIMATE : FFTW_MEASURE;
131 gmx_fatal(FARGS, "Invalid opaque FFT datatype pointer.");
137 if ((fft = static_cast<gmx_fft_t>(FFTWPREFIX(malloc)(sizeof(struct gmx_fft)))) == nullptr)
143 /* allocate aligned, and extra memory to make it unaligned */
144 p1 = static_cast<FFTWPREFIX(complex)*>(
145 FFTWPREFIX(malloc)(sizeof(FFTWPREFIX(complex)) * (nx + 2) * howmany));
148 FFTWPREFIX(free)(fft);
153 p2 = static_cast<FFTWPREFIX(complex)*>(
154 FFTWPREFIX(malloc)(sizeof(FFTWPREFIX(complex)) * (nx + 2) * howmany));
157 FFTWPREFIX(free)(p1);
158 FFTWPREFIX(free)(fft);
163 /* make unaligned pointers.
164 * In double precision the actual complex datatype will be 16 bytes,
165 * so go to a char pointer and force an offset of 8 bytes instead.
167 pc = reinterpret_cast<char*>(p1);
169 up1 = reinterpret_cast<FFTWPREFIX(complex)*>(pc);
171 pc = reinterpret_cast<char*>(p2);
173 up2 = reinterpret_cast<FFTWPREFIX(complex)*>(pc);
175 /* int rank, const int *n, int howmany,
176 fftw_complex *in, const int *inembed,
177 int istride, int idist,
178 fftw_complex *out, const int *onembed,
179 int ostride, int odist,
180 int sign, unsigned flags */
181 fft->plan[0][0][0] = FFTWPREFIX(plan_many_dft)(
182 1, &nx, howmany, up1, &nx, 1, nx, up2, &nx, 1, nx, FFTW_BACKWARD, fftw_flags);
183 fft->plan[0][0][1] = FFTWPREFIX(plan_many_dft)(
184 1, &nx, howmany, up1, &nx, 1, nx, up2, &nx, 1, nx, FFTW_FORWARD, fftw_flags);
185 fft->plan[0][1][0] = FFTWPREFIX(plan_many_dft)(
186 1, &nx, howmany, up1, &nx, 1, nx, up1, &nx, 1, nx, FFTW_BACKWARD, fftw_flags);
187 fft->plan[0][1][1] = FFTWPREFIX(plan_many_dft)(
188 1, &nx, howmany, up1, &nx, 1, nx, up1, &nx, 1, nx, FFTW_FORWARD, fftw_flags);
189 fft->plan[1][0][0] = FFTWPREFIX(plan_many_dft)(
190 1, &nx, howmany, p1, &nx, 1, nx, p2, &nx, 1, nx, FFTW_BACKWARD, fftw_flags);
191 fft->plan[1][0][1] = FFTWPREFIX(plan_many_dft)(
192 1, &nx, howmany, p1, &nx, 1, nx, p2, &nx, 1, nx, FFTW_FORWARD, fftw_flags);
193 fft->plan[1][1][0] = FFTWPREFIX(plan_many_dft)(
194 1, &nx, howmany, p1, &nx, 1, nx, p1, &nx, 1, nx, FFTW_BACKWARD, fftw_flags);
195 fft->plan[1][1][1] = FFTWPREFIX(plan_many_dft)(
196 1, &nx, howmany, p1, &nx, 1, nx, p1, &nx, 1, nx, FFTW_FORWARD, fftw_flags);
198 for (i = 0; i < 2; i++)
200 for (j = 0; j < 2; j++)
202 for (k = 0; k < 2; k++)
204 if (fft->plan[i][j][k] == nullptr)
206 gmx_fatal(FARGS, "Error initializing FFTW3 plan.");
208 gmx_fft_destroy(fft);
210 FFTWPREFIX(free)(p1);
211 FFTWPREFIX(free)(p2);
219 FFTWPREFIX(free)(p1);
220 FFTWPREFIX(free)(p2);
222 fft->real_transform = 0;
230 int gmx_fft_init_1d_real(gmx_fft_t* pfft, int nx, gmx_fft_flag flags)
232 return gmx_fft_init_many_1d_real(pfft, nx, 1, flags);
235 int gmx_fft_init_many_1d_real(gmx_fft_t* pfft, int nx, int howmany, gmx_fft_flag flags)
238 real * p1, *p2, *up1, *up2;
243 #if GMX_DISABLE_FFTW_MEASURE
244 flags |= GMX_FFT_FLAG_CONSERVATIVE;
247 fftw_flags = (flags & GMX_FFT_FLAG_CONSERVATIVE) ? FFTW_ESTIMATE : FFTW_MEASURE;
251 gmx_fatal(FARGS, "Invalid opaque FFT datatype pointer.");
257 if ((fft = static_cast<gmx_fft_t>(FFTWPREFIX(malloc)(sizeof(struct gmx_fft)))) == nullptr)
263 /* allocate aligned, and extra memory to make it unaligned */
264 p1 = static_cast<real*>(FFTWPREFIX(malloc)(sizeof(real) * (nx / 2 + 1) * 2 * howmany + 8));
267 FFTWPREFIX(free)(fft);
272 p2 = static_cast<real*>(FFTWPREFIX(malloc)(sizeof(real) * (nx / 2 + 1) * 2 * howmany + 8));
275 FFTWPREFIX(free)(p1);
276 FFTWPREFIX(free)(fft);
281 /* make unaligned pointers.
282 * In double precision the actual complex datatype will be 16 bytes,
283 * so go to a char pointer and force an offset of 8 bytes instead.
285 pc = reinterpret_cast<char*>(p1);
287 up1 = reinterpret_cast<real*>(pc);
289 pc = reinterpret_cast<char*>(p2);
291 up2 = reinterpret_cast<real*>(pc);
293 /* int rank, const int *n, int howmany,
294 double *in, const int *inembed,
295 int istride, int idist,
296 fftw_complex *out, const int *onembed,
297 int ostride, int odist,
299 fft->plan[0][0][1] = FFTWPREFIX(plan_many_dft_r2c)(1,
306 reinterpret_cast<FFTWPREFIX(complex)*>(up2),
311 fft->plan[0][1][1] = FFTWPREFIX(plan_many_dft_r2c)(1,
318 reinterpret_cast<FFTWPREFIX(complex)*>(up1),
323 fft->plan[1][0][1] = FFTWPREFIX(plan_many_dft_r2c)(1,
330 reinterpret_cast<FFTWPREFIX(complex)*>(p2),
335 fft->plan[1][1][1] = FFTWPREFIX(plan_many_dft_r2c)(1,
342 reinterpret_cast<FFTWPREFIX(complex)*>(p1),
348 fft->plan[0][0][0] = FFTWPREFIX(plan_many_dft_c2r)(1,
351 reinterpret_cast<FFTWPREFIX(complex)*>(up1),
360 fft->plan[0][1][0] = FFTWPREFIX(plan_many_dft_c2r)(1,
363 reinterpret_cast<FFTWPREFIX(complex)*>(up1),
372 fft->plan[1][0][0] = FFTWPREFIX(plan_many_dft_c2r)(1,
375 reinterpret_cast<FFTWPREFIX(complex)*>(p1),
384 fft->plan[1][1][0] = FFTWPREFIX(plan_many_dft_c2r)(1,
387 reinterpret_cast<FFTWPREFIX(complex)*>(p1),
397 for (i = 0; i < 2; i++)
399 for (j = 0; j < 2; j++)
401 for (k = 0; k < 2; k++)
403 if (fft->plan[i][j][k] == nullptr)
405 gmx_fatal(FARGS, "Error initializing FFTW3 plan.");
407 gmx_fft_destroy(fft);
409 FFTWPREFIX(free)(p1);
410 FFTWPREFIX(free)(p2);
418 FFTWPREFIX(free)(p1);
419 FFTWPREFIX(free)(p2);
421 fft->real_transform = 1;
430 int gmx_fft_init_2d_real(gmx_fft_t* pfft, int nx, int ny, gmx_fft_flag flags)
433 real * p1, *p2, *up1, *up2;
438 #if GMX_DISABLE_FFTW_MEASURE
439 flags |= GMX_FFT_FLAG_CONSERVATIVE;
442 fftw_flags = (flags & GMX_FFT_FLAG_CONSERVATIVE) ? FFTW_ESTIMATE : FFTW_MEASURE;
446 gmx_fatal(FARGS, "Invalid opaque FFT datatype pointer.");
452 if ((fft = static_cast<gmx_fft_t>(FFTWPREFIX(malloc)(sizeof(struct gmx_fft)))) == nullptr)
458 /* allocate aligned, and extra memory to make it unaligned */
459 p1 = static_cast<real*>(FFTWPREFIX(malloc)(sizeof(real) * (nx * (ny / 2 + 1) * 2 + 2)));
462 FFTWPREFIX(free)(fft);
467 p2 = static_cast<real*>(FFTWPREFIX(malloc)(sizeof(real) * (nx * (ny / 2 + 1) * 2 + 2)));
470 FFTWPREFIX(free)(p1);
471 FFTWPREFIX(free)(fft);
476 /* make unaligned pointers.
477 * In double precision the actual complex datatype will be 16 bytes,
478 * so go to a char pointer and force an offset of 8 bytes instead.
480 pc = reinterpret_cast<char*>(p1);
482 up1 = reinterpret_cast<real*>(pc);
484 pc = reinterpret_cast<char*>(p2);
486 up2 = reinterpret_cast<real*>(pc);
489 fft->plan[0][0][0] = FFTWPREFIX(plan_dft_c2r_2d)(
490 nx, ny, reinterpret_cast<FFTWPREFIX(complex)*>(up1), up2, fftw_flags);
491 fft->plan[0][0][1] = FFTWPREFIX(plan_dft_r2c_2d)(
492 nx, ny, up1, reinterpret_cast<FFTWPREFIX(complex)*>(up2), fftw_flags);
493 fft->plan[0][1][0] = FFTWPREFIX(plan_dft_c2r_2d)(
494 nx, ny, reinterpret_cast<FFTWPREFIX(complex)*>(up1), up1, fftw_flags);
495 fft->plan[0][1][1] = FFTWPREFIX(plan_dft_r2c_2d)(
496 nx, ny, up1, reinterpret_cast<FFTWPREFIX(complex)*>(up1), fftw_flags);
498 fft->plan[1][0][0] = FFTWPREFIX(plan_dft_c2r_2d)(
499 nx, ny, reinterpret_cast<FFTWPREFIX(complex)*>(p1), p2, fftw_flags);
500 fft->plan[1][0][1] = FFTWPREFIX(plan_dft_r2c_2d)(
501 nx, ny, p1, reinterpret_cast<FFTWPREFIX(complex)*>(p2), fftw_flags);
502 fft->plan[1][1][0] = FFTWPREFIX(plan_dft_c2r_2d)(
503 nx, ny, reinterpret_cast<FFTWPREFIX(complex)*>(p1), p1, fftw_flags);
504 fft->plan[1][1][1] = FFTWPREFIX(plan_dft_r2c_2d)(
505 nx, ny, p1, reinterpret_cast<FFTWPREFIX(complex)*>(p1), fftw_flags);
508 for (i = 0; i < 2; i++)
510 for (j = 0; j < 2; j++)
512 for (k = 0; k < 2; k++)
514 if (fft->plan[i][j][k] == nullptr)
516 gmx_fatal(FARGS, "Error initializing FFTW3 plan.");
518 gmx_fft_destroy(fft);
520 FFTWPREFIX(free)(p1);
521 FFTWPREFIX(free)(p2);
529 FFTWPREFIX(free)(p1);
530 FFTWPREFIX(free)(p2);
532 fft->real_transform = 1;
540 int gmx_fft_1d(gmx_fft_t fft, enum gmx_fft_direction dir, void* in_data, void* out_data)
542 bool aligned = (((size_t(in_data) | size_t(out_data)) & 0xf) == 0);
543 bool inplace = (in_data == out_data);
544 bool isforward = (dir == GMX_FFT_FORWARD);
547 if ((fft->real_transform == 1) || (fft->ndim != 1)
548 || ((dir != GMX_FFT_FORWARD) && (dir != GMX_FFT_BACKWARD)))
550 gmx_fatal(FARGS, "FFT plan mismatch - bad plan or direction.");
554 FFTWPREFIX(execute_dft)
555 (fft->plan[aligned][inplace][isforward],
556 static_cast<FFTWPREFIX(complex)*>(in_data),
557 static_cast<FFTWPREFIX(complex)*>(out_data));
562 int gmx_fft_many_1d(gmx_fft_t fft, enum gmx_fft_direction dir, void* in_data, void* out_data)
564 return gmx_fft_1d(fft, dir, in_data, out_data);
567 int gmx_fft_1d_real(gmx_fft_t fft, enum gmx_fft_direction dir, void* in_data, void* out_data)
569 bool aligned = (((size_t(in_data) | size_t(out_data)) & 0xf) == 0);
570 bool inplace = (in_data == out_data);
571 bool isforward = (dir == GMX_FFT_REAL_TO_COMPLEX);
574 if ((fft->real_transform != 1) || (fft->ndim != 1)
575 || ((dir != GMX_FFT_REAL_TO_COMPLEX) && (dir != GMX_FFT_COMPLEX_TO_REAL)))
577 gmx_fatal(FARGS, "FFT plan mismatch - bad plan or direction.");
583 FFTWPREFIX(execute_dft_r2c)
584 (fft->plan[aligned][inplace][isforward],
585 static_cast<real*>(in_data),
586 static_cast<FFTWPREFIX(complex)*>(out_data));
590 FFTWPREFIX(execute_dft_c2r)
591 (fft->plan[aligned][inplace][isforward],
592 static_cast<FFTWPREFIX(complex)*>(in_data),
593 static_cast<real*>(out_data));
599 int gmx_fft_many_1d_real(gmx_fft_t fft, enum gmx_fft_direction dir, void* in_data, void* out_data)
601 return gmx_fft_1d_real(fft, dir, in_data, out_data);
604 int gmx_fft_2d_real(gmx_fft_t fft, enum gmx_fft_direction dir, void* in_data, void* out_data)
606 bool aligned = (((size_t(in_data) | size_t(out_data)) & 0xf) == 0);
607 bool inplace = (in_data == out_data);
608 bool isforward = (dir == GMX_FFT_REAL_TO_COMPLEX);
611 if ((fft->real_transform != 1) || (fft->ndim != 2)
612 || ((dir != GMX_FFT_REAL_TO_COMPLEX) && (dir != GMX_FFT_COMPLEX_TO_REAL)))
614 gmx_fatal(FARGS, "FFT plan mismatch - bad plan or direction.");
620 FFTWPREFIX(execute_dft_r2c)
621 (fft->plan[aligned][inplace][isforward],
622 static_cast<real*>(in_data),
623 static_cast<FFTWPREFIX(complex)*>(out_data));
627 FFTWPREFIX(execute_dft_c2r)
628 (fft->plan[aligned][inplace][isforward],
629 static_cast<FFTWPREFIX(complex)*>(in_data),
630 static_cast<real*>(out_data));
637 void gmx_fft_destroy(gmx_fft_t fft)
643 for (i = 0; i < 2; i++)
645 for (j = 0; j < 2; j++)
647 for (k = 0; k < 2; k++)
649 if (fft->plan[i][j][k] != nullptr)
652 FFTWPREFIX(destroy_plan)(fft->plan[i][j][k]);
654 fft->plan[i][j][k] = nullptr;
660 FFTWPREFIX(free)(fft);
665 void gmx_many_fft_destroy(gmx_fft_t fft)
667 gmx_fft_destroy(fft);
670 void gmx_fft_cleanup()
672 FFTWPREFIX(cleanup)();