2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2003 David van der Spoel, Erik Lindahl, University of Groningen.
5 * Copyright (c) 2013,2014,2015,2017,2018,2019, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
45 #include "gromacs/fft/fft.h"
46 #include "gromacs/utility/exceptions.h"
47 #include "gromacs/utility/fatalerror.h"
48 #include "gromacs/utility/mutex.h"
51 # define FFTWPREFIX(name) fftw_##name
53 # define FFTWPREFIX(name) fftwf_##name
56 /* none of the fftw3 calls, except execute(), are thread-safe, so
57 we need to serialize them with this mutex. */
58 static gmx::Mutex big_fftw_mutex;
62 big_fftw_mutex.lock(); \
64 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
68 big_fftw_mutex.unlock(); \
70 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
72 /* We assume here that aligned memory starts at multiple of 16 bytes and unaligned memory starts at multiple of 8 bytes. The later is guranteed for all malloc implementation.
74 - It is not allowed to use these FFT plans from memory which doesn't have a starting address as a multiple of 8 bytes.
75 This is OK as long as the memory directly comes from malloc and is not some subarray within alloated memory.
76 - This has to be fixed if any future architecute requires memory to be aligned to multiples of 32 bytes.
80 * Contents of the FFTW3 fft datatype.
82 * Note that this is one of several possible implementations of gmx_fft_t.
93 * Three alternatives (unaligned/aligned, out-of-place/in-place, forward/backward)
94 * results in 8 different FFTW plans. Keep track of them with 3 array indices:
95 * first index: 0=unaligned, 1=aligned
96 * second index: 0=out-of-place, 1=in-place
97 * third index: 0=backward, 1=forward
99 FFTWPREFIX(plan) plan[2][2][2];
100 /** Used to catch user mistakes */
102 /** Number of dimensions in the FFT */
106 int gmx_fft_init_1d(gmx_fft_t* pfft, int nx, gmx_fft_flag flags)
108 return gmx_fft_init_many_1d(pfft, nx, 1, flags);
112 int gmx_fft_init_many_1d(gmx_fft_t* pfft, int nx, int howmany, gmx_fft_flag flags)
115 FFTWPREFIX(complex) * p1, *p2, *up1, *up2;
120 #if GMX_DISABLE_FFTW_MEASURE
121 flags |= GMX_FFT_FLAG_CONSERVATIVE;
124 fftw_flags = (flags & GMX_FFT_FLAG_CONSERVATIVE) ? FFTW_ESTIMATE : FFTW_MEASURE;
128 gmx_fatal(FARGS, "Invalid opaque FFT datatype pointer.");
134 if ((fft = static_cast<gmx_fft_t>(FFTWPREFIX(malloc)(sizeof(struct gmx_fft)))) == nullptr)
140 /* allocate aligned, and extra memory to make it unaligned */
141 p1 = static_cast<FFTWPREFIX(complex)*>(
142 FFTWPREFIX(malloc)(sizeof(FFTWPREFIX(complex)) * (nx + 2) * howmany));
145 FFTWPREFIX(free)(fft);
150 p2 = static_cast<FFTWPREFIX(complex)*>(
151 FFTWPREFIX(malloc)(sizeof(FFTWPREFIX(complex)) * (nx + 2) * howmany));
154 FFTWPREFIX(free)(p1);
155 FFTWPREFIX(free)(fft);
160 /* make unaligned pointers.
161 * In double precision the actual complex datatype will be 16 bytes,
162 * so go to a char pointer and force an offset of 8 bytes instead.
164 pc = reinterpret_cast<char*>(p1);
166 up1 = reinterpret_cast<FFTWPREFIX(complex)*>(pc);
168 pc = reinterpret_cast<char*>(p2);
170 up2 = reinterpret_cast<FFTWPREFIX(complex)*>(pc);
172 /* int rank, const int *n, int howmany,
173 fftw_complex *in, const int *inembed,
174 int istride, int idist,
175 fftw_complex *out, const int *onembed,
176 int ostride, int odist,
177 int sign, unsigned flags */
178 fft->plan[0][0][0] = FFTWPREFIX(plan_many_dft)(1, &nx, howmany, up1, &nx, 1, nx, up2, &nx, 1,
179 nx, FFTW_BACKWARD, fftw_flags);
180 fft->plan[0][0][1] = FFTWPREFIX(plan_many_dft)(1, &nx, howmany, up1, &nx, 1, nx, up2, &nx, 1,
181 nx, FFTW_FORWARD, fftw_flags);
182 fft->plan[0][1][0] = FFTWPREFIX(plan_many_dft)(1, &nx, howmany, up1, &nx, 1, nx, up1, &nx, 1,
183 nx, FFTW_BACKWARD, fftw_flags);
184 fft->plan[0][1][1] = FFTWPREFIX(plan_many_dft)(1, &nx, howmany, up1, &nx, 1, nx, up1, &nx, 1,
185 nx, FFTW_FORWARD, fftw_flags);
186 fft->plan[1][0][0] = FFTWPREFIX(plan_many_dft)(1, &nx, howmany, p1, &nx, 1, nx, p2, &nx, 1, nx,
187 FFTW_BACKWARD, fftw_flags);
188 fft->plan[1][0][1] = FFTWPREFIX(plan_many_dft)(1, &nx, howmany, p1, &nx, 1, nx, p2, &nx, 1, nx,
189 FFTW_FORWARD, fftw_flags);
190 fft->plan[1][1][0] = FFTWPREFIX(plan_many_dft)(1, &nx, howmany, p1, &nx, 1, nx, p1, &nx, 1, nx,
191 FFTW_BACKWARD, fftw_flags);
192 fft->plan[1][1][1] = FFTWPREFIX(plan_many_dft)(1, &nx, howmany, p1, &nx, 1, nx, p1, &nx, 1, nx,
193 FFTW_FORWARD, fftw_flags);
195 for (i = 0; i < 2; i++)
197 for (j = 0; j < 2; j++)
199 for (k = 0; k < 2; k++)
201 if (fft->plan[i][j][k] == nullptr)
203 gmx_fatal(FARGS, "Error initializing FFTW3 plan.");
205 gmx_fft_destroy(fft);
207 FFTWPREFIX(free)(p1);
208 FFTWPREFIX(free)(p2);
216 FFTWPREFIX(free)(p1);
217 FFTWPREFIX(free)(p2);
219 fft->real_transform = 0;
227 int gmx_fft_init_1d_real(gmx_fft_t* pfft, int nx, gmx_fft_flag flags)
229 return gmx_fft_init_many_1d_real(pfft, nx, 1, flags);
232 int gmx_fft_init_many_1d_real(gmx_fft_t* pfft, int nx, int howmany, gmx_fft_flag flags)
235 real * p1, *p2, *up1, *up2;
240 #if GMX_DISABLE_FFTW_MEASURE
241 flags |= GMX_FFT_FLAG_CONSERVATIVE;
244 fftw_flags = (flags & GMX_FFT_FLAG_CONSERVATIVE) ? FFTW_ESTIMATE : FFTW_MEASURE;
248 gmx_fatal(FARGS, "Invalid opaque FFT datatype pointer.");
254 if ((fft = static_cast<gmx_fft_t>(FFTWPREFIX(malloc)(sizeof(struct gmx_fft)))) == nullptr)
260 /* allocate aligned, and extra memory to make it unaligned */
261 p1 = static_cast<real*>(FFTWPREFIX(malloc)(sizeof(real) * (nx / 2 + 1) * 2 * howmany + 8));
264 FFTWPREFIX(free)(fft);
269 p2 = static_cast<real*>(FFTWPREFIX(malloc)(sizeof(real) * (nx / 2 + 1) * 2 * howmany + 8));
272 FFTWPREFIX(free)(p1);
273 FFTWPREFIX(free)(fft);
278 /* make unaligned pointers.
279 * In double precision the actual complex datatype will be 16 bytes,
280 * so go to a char pointer and force an offset of 8 bytes instead.
282 pc = reinterpret_cast<char*>(p1);
284 up1 = reinterpret_cast<real*>(pc);
286 pc = reinterpret_cast<char*>(p2);
288 up2 = reinterpret_cast<real*>(pc);
290 /* int rank, const int *n, int howmany,
291 double *in, const int *inembed,
292 int istride, int idist,
293 fftw_complex *out, const int *onembed,
294 int ostride, int odist,
296 fft->plan[0][0][1] = FFTWPREFIX(plan_many_dft_r2c)(
297 1, &nx, howmany, up1, nullptr, 1, (nx / 2 + 1) * 2,
298 reinterpret_cast<FFTWPREFIX(complex)*>(up2), nullptr, 1, (nx / 2 + 1), fftw_flags);
299 fft->plan[0][1][1] = FFTWPREFIX(plan_many_dft_r2c)(
300 1, &nx, howmany, up1, nullptr, 1, (nx / 2 + 1) * 2,
301 reinterpret_cast<FFTWPREFIX(complex)*>(up1), nullptr, 1, (nx / 2 + 1), fftw_flags);
302 fft->plan[1][0][1] = FFTWPREFIX(plan_many_dft_r2c)(
303 1, &nx, howmany, p1, nullptr, 1, (nx / 2 + 1) * 2,
304 reinterpret_cast<FFTWPREFIX(complex)*>(p2), nullptr, 1, (nx / 2 + 1), fftw_flags);
305 fft->plan[1][1][1] = FFTWPREFIX(plan_many_dft_r2c)(
306 1, &nx, howmany, p1, nullptr, 1, (nx / 2 + 1) * 2,
307 reinterpret_cast<FFTWPREFIX(complex)*>(p1), nullptr, 1, (nx / 2 + 1), fftw_flags);
309 fft->plan[0][0][0] = FFTWPREFIX(plan_many_dft_c2r)(
310 1, &nx, howmany, reinterpret_cast<FFTWPREFIX(complex)*>(up1), nullptr, 1, (nx / 2 + 1),
311 up2, nullptr, 1, (nx / 2 + 1) * 2, fftw_flags);
312 fft->plan[0][1][0] = FFTWPREFIX(plan_many_dft_c2r)(
313 1, &nx, howmany, reinterpret_cast<FFTWPREFIX(complex)*>(up1), nullptr, 1, (nx / 2 + 1),
314 up1, nullptr, 1, (nx / 2 + 1) * 2, fftw_flags);
315 fft->plan[1][0][0] = FFTWPREFIX(plan_many_dft_c2r)(
316 1, &nx, howmany, reinterpret_cast<FFTWPREFIX(complex)*>(p1), nullptr, 1, (nx / 2 + 1),
317 p2, nullptr, 1, (nx / 2 + 1) * 2, fftw_flags);
318 fft->plan[1][1][0] = FFTWPREFIX(plan_many_dft_c2r)(
319 1, &nx, howmany, reinterpret_cast<FFTWPREFIX(complex)*>(p1), nullptr, 1, (nx / 2 + 1),
320 p1, nullptr, 1, (nx / 2 + 1) * 2, fftw_flags);
322 for (i = 0; i < 2; i++)
324 for (j = 0; j < 2; j++)
326 for (k = 0; k < 2; k++)
328 if (fft->plan[i][j][k] == nullptr)
330 gmx_fatal(FARGS, "Error initializing FFTW3 plan.");
332 gmx_fft_destroy(fft);
334 FFTWPREFIX(free)(p1);
335 FFTWPREFIX(free)(p2);
343 FFTWPREFIX(free)(p1);
344 FFTWPREFIX(free)(p2);
346 fft->real_transform = 1;
355 int gmx_fft_init_2d_real(gmx_fft_t* pfft, int nx, int ny, gmx_fft_flag flags)
358 real * p1, *p2, *up1, *up2;
363 #if GMX_DISABLE_FFTW_MEASURE
364 flags |= GMX_FFT_FLAG_CONSERVATIVE;
367 fftw_flags = (flags & GMX_FFT_FLAG_CONSERVATIVE) ? FFTW_ESTIMATE : FFTW_MEASURE;
371 gmx_fatal(FARGS, "Invalid opaque FFT datatype pointer.");
377 if ((fft = static_cast<gmx_fft_t>(FFTWPREFIX(malloc)(sizeof(struct gmx_fft)))) == nullptr)
383 /* allocate aligned, and extra memory to make it unaligned */
384 p1 = static_cast<real*>(FFTWPREFIX(malloc)(sizeof(real) * (nx * (ny / 2 + 1) * 2 + 2)));
387 FFTWPREFIX(free)(fft);
392 p2 = static_cast<real*>(FFTWPREFIX(malloc)(sizeof(real) * (nx * (ny / 2 + 1) * 2 + 2)));
395 FFTWPREFIX(free)(p1);
396 FFTWPREFIX(free)(fft);
401 /* make unaligned pointers.
402 * In double precision the actual complex datatype will be 16 bytes,
403 * so go to a char pointer and force an offset of 8 bytes instead.
405 pc = reinterpret_cast<char*>(p1);
407 up1 = reinterpret_cast<real*>(pc);
409 pc = reinterpret_cast<char*>(p2);
411 up2 = reinterpret_cast<real*>(pc);
414 fft->plan[0][0][0] = FFTWPREFIX(plan_dft_c2r_2d)(
415 nx, ny, reinterpret_cast<FFTWPREFIX(complex)*>(up1), up2, fftw_flags);
416 fft->plan[0][0][1] = FFTWPREFIX(plan_dft_r2c_2d)(
417 nx, ny, up1, reinterpret_cast<FFTWPREFIX(complex)*>(up2), fftw_flags);
418 fft->plan[0][1][0] = FFTWPREFIX(plan_dft_c2r_2d)(
419 nx, ny, reinterpret_cast<FFTWPREFIX(complex)*>(up1), up1, fftw_flags);
420 fft->plan[0][1][1] = FFTWPREFIX(plan_dft_r2c_2d)(
421 nx, ny, up1, reinterpret_cast<FFTWPREFIX(complex)*>(up1), fftw_flags);
423 fft->plan[1][0][0] = FFTWPREFIX(plan_dft_c2r_2d)(
424 nx, ny, reinterpret_cast<FFTWPREFIX(complex)*>(p1), p2, fftw_flags);
425 fft->plan[1][0][1] = FFTWPREFIX(plan_dft_r2c_2d)(
426 nx, ny, p1, reinterpret_cast<FFTWPREFIX(complex)*>(p2), fftw_flags);
427 fft->plan[1][1][0] = FFTWPREFIX(plan_dft_c2r_2d)(
428 nx, ny, reinterpret_cast<FFTWPREFIX(complex)*>(p1), p1, fftw_flags);
429 fft->plan[1][1][1] = FFTWPREFIX(plan_dft_r2c_2d)(
430 nx, ny, p1, reinterpret_cast<FFTWPREFIX(complex)*>(p1), fftw_flags);
433 for (i = 0; i < 2; i++)
435 for (j = 0; j < 2; j++)
437 for (k = 0; k < 2; k++)
439 if (fft->plan[i][j][k] == nullptr)
441 gmx_fatal(FARGS, "Error initializing FFTW3 plan.");
443 gmx_fft_destroy(fft);
445 FFTWPREFIX(free)(p1);
446 FFTWPREFIX(free)(p2);
454 FFTWPREFIX(free)(p1);
455 FFTWPREFIX(free)(p2);
457 fft->real_transform = 1;
465 int gmx_fft_1d(gmx_fft_t fft, enum gmx_fft_direction dir, void* in_data, void* out_data)
467 bool aligned = (((size_t(in_data) | size_t(out_data)) & 0xf) == 0);
468 bool inplace = (in_data == out_data);
469 bool isforward = (dir == GMX_FFT_FORWARD);
472 if ((fft->real_transform == 1) || (fft->ndim != 1)
473 || ((dir != GMX_FFT_FORWARD) && (dir != GMX_FFT_BACKWARD)))
475 gmx_fatal(FARGS, "FFT plan mismatch - bad plan or direction.");
479 FFTWPREFIX(execute_dft)
480 (fft->plan[aligned][inplace][isforward], static_cast<FFTWPREFIX(complex)*>(in_data),
481 static_cast<FFTWPREFIX(complex)*>(out_data));
486 int gmx_fft_many_1d(gmx_fft_t fft, enum gmx_fft_direction dir, void* in_data, void* out_data)
488 return gmx_fft_1d(fft, dir, in_data, out_data);
491 int gmx_fft_1d_real(gmx_fft_t fft, enum gmx_fft_direction dir, void* in_data, void* out_data)
493 bool aligned = (((size_t(in_data) | size_t(out_data)) & 0xf) == 0);
494 bool inplace = (in_data == out_data);
495 bool isforward = (dir == GMX_FFT_REAL_TO_COMPLEX);
498 if ((fft->real_transform != 1) || (fft->ndim != 1)
499 || ((dir != GMX_FFT_REAL_TO_COMPLEX) && (dir != GMX_FFT_COMPLEX_TO_REAL)))
501 gmx_fatal(FARGS, "FFT plan mismatch - bad plan or direction.");
507 FFTWPREFIX(execute_dft_r2c)
508 (fft->plan[aligned][inplace][isforward], static_cast<real*>(in_data),
509 static_cast<FFTWPREFIX(complex)*>(out_data));
513 FFTWPREFIX(execute_dft_c2r)
514 (fft->plan[aligned][inplace][isforward], static_cast<FFTWPREFIX(complex)*>(in_data),
515 static_cast<real*>(out_data));
521 int gmx_fft_many_1d_real(gmx_fft_t fft, enum gmx_fft_direction dir, void* in_data, void* out_data)
523 return gmx_fft_1d_real(fft, dir, in_data, out_data);
526 int gmx_fft_2d_real(gmx_fft_t fft, enum gmx_fft_direction dir, void* in_data, void* out_data)
528 bool aligned = (((size_t(in_data) | size_t(out_data)) & 0xf) == 0);
529 bool inplace = (in_data == out_data);
530 bool isforward = (dir == GMX_FFT_REAL_TO_COMPLEX);
533 if ((fft->real_transform != 1) || (fft->ndim != 2)
534 || ((dir != GMX_FFT_REAL_TO_COMPLEX) && (dir != GMX_FFT_COMPLEX_TO_REAL)))
536 gmx_fatal(FARGS, "FFT plan mismatch - bad plan or direction.");
542 FFTWPREFIX(execute_dft_r2c)
543 (fft->plan[aligned][inplace][isforward], static_cast<real*>(in_data),
544 static_cast<FFTWPREFIX(complex)*>(out_data));
548 FFTWPREFIX(execute_dft_c2r)
549 (fft->plan[aligned][inplace][isforward], static_cast<FFTWPREFIX(complex)*>(in_data),
550 static_cast<real*>(out_data));
557 void gmx_fft_destroy(gmx_fft_t fft)
563 for (i = 0; i < 2; i++)
565 for (j = 0; j < 2; j++)
567 for (k = 0; k < 2; k++)
569 if (fft->plan[i][j][k] != nullptr)
572 FFTWPREFIX(destroy_plan)(fft->plan[i][j][k]);
574 fft->plan[i][j][k] = nullptr;
580 FFTWPREFIX(free)(fft);
585 void gmx_many_fft_destroy(gmx_fft_t fft)
587 gmx_fft_destroy(fft);
590 void gmx_fft_cleanup()
592 FFTWPREFIX(cleanup)();