2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2003 David van der Spoel, Erik Lindahl, University of Groningen.
5 * Copyright (c) 2013,2014, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
45 #include "gromacs/fft/fft.h"
46 #include "gromacs/utility/fatalerror.h"
49 #define FFTWPREFIX(name) fftw_ ## name
51 #define FFTWPREFIX(name) fftwf_ ## name
54 #include "thread_mpi/mutex.h"
55 #include "gromacs/utility/exceptions.h"
57 /* none of the fftw3 calls, except execute(), are thread-safe, so
58 we need to serialize them with this mutex. */
59 static tMPI::mutex big_fftw_mutex;
60 #define FFTW_LOCK try { big_fftw_mutex.lock(); } GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
61 #define FFTW_UNLOCK try { big_fftw_mutex.unlock(); } GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
63 /* We assume here that aligned memory starts at multiple of 16 bytes and unaligned memory starts at multiple of 8 bytes. The later is guranteed for all malloc implementation.
65 - It is not allowed to use these FFT plans from memory which doesn't have a starting address as a multiple of 8 bytes.
66 This is OK as long as the memory directly comes from malloc and is not some subarray within alloated memory.
67 - This has to be fixed if any future architecute requires memory to be aligned to multiples of 32 bytes.
71 * Contents of the FFTW3 fft datatype.
73 * Note that this is one of several possible implementations of gmx_fft_t.
84 * Three alternatives (unaligned/aligned, out-of-place/in-place, forward/backward)
85 * results in 8 different FFTW plans. Keep track of them with 3 array indices:
86 * first index: 0=unaligned, 1=aligned
87 * second index: 0=out-of-place, 1=in-place
88 * third index: 0=backward, 1=forward
90 FFTWPREFIX(plan) plan[2][2][2];
91 /** Used to catch user mistakes */
93 /** Number of dimensions in the FFT */
98 gmx_fft_init_1d(gmx_fft_t * pfft,
102 return gmx_fft_init_many_1d(pfft, nx, 1, flags);
107 gmx_fft_init_many_1d(gmx_fft_t * pfft,
113 FFTWPREFIX(complex) *p1, *p2, *up1, *up2;
118 #ifdef GMX_DISABLE_FFTW_MEASURE
119 flags |= GMX_FFT_FLAG_CONSERVATIVE;
122 fftw_flags = (flags & GMX_FFT_FLAG_CONSERVATIVE) ? FFTW_ESTIMATE : FFTW_MEASURE;
126 gmx_fatal(FARGS, "Invalid opaque FFT datatype pointer.");
132 if ( (fft = (gmx_fft_t)FFTWPREFIX(malloc)(sizeof(struct gmx_fft))) == NULL)
138 /* allocate aligned, and extra memory to make it unaligned */
139 p1 = (FFTWPREFIX(complex) *) FFTWPREFIX(malloc)(sizeof(FFTWPREFIX(complex))*(nx+2)*howmany);
142 FFTWPREFIX(free)(fft);
147 p2 = (FFTWPREFIX(complex) *) FFTWPREFIX(malloc)(sizeof(FFTWPREFIX(complex))*(nx+2)*howmany);
150 FFTWPREFIX(free)(p1);
151 FFTWPREFIX(free)(fft);
156 /* make unaligned pointers.
157 * In double precision the actual complex datatype will be 16 bytes,
158 * so go to a char pointer and force an offset of 8 bytes instead.
162 up1 = (FFTWPREFIX(complex) *)pc;
166 up2 = (FFTWPREFIX(complex) *)pc;
168 /* int rank, const int *n, int howmany,
169 fftw_complex *in, const int *inembed,
170 int istride, int idist,
171 fftw_complex *out, const int *onembed,
172 int ostride, int odist,
173 int sign, unsigned flags */
174 fft->plan[0][0][0] = FFTWPREFIX(plan_many_dft)(1, &nx, howmany, up1, &nx, 1, nx, up2, &nx, 1, nx, FFTW_BACKWARD, fftw_flags);
175 fft->plan[0][0][1] = FFTWPREFIX(plan_many_dft)(1, &nx, howmany, up1, &nx, 1, nx, up2, &nx, 1, nx, FFTW_FORWARD, fftw_flags);
176 fft->plan[0][1][0] = FFTWPREFIX(plan_many_dft)(1, &nx, howmany, up1, &nx, 1, nx, up1, &nx, 1, nx, FFTW_BACKWARD, fftw_flags);
177 fft->plan[0][1][1] = FFTWPREFIX(plan_many_dft)(1, &nx, howmany, up1, &nx, 1, nx, up1, &nx, 1, nx, FFTW_FORWARD, fftw_flags);
178 fft->plan[1][0][0] = FFTWPREFIX(plan_many_dft)(1, &nx, howmany, p1, &nx, 1, nx, p2, &nx, 1, nx, FFTW_BACKWARD, fftw_flags);
179 fft->plan[1][0][1] = FFTWPREFIX(plan_many_dft)(1, &nx, howmany, p1, &nx, 1, nx, p2, &nx, 1, nx, FFTW_FORWARD, fftw_flags);
180 fft->plan[1][1][0] = FFTWPREFIX(plan_many_dft)(1, &nx, howmany, p1, &nx, 1, nx, p1, &nx, 1, nx, FFTW_BACKWARD, fftw_flags);
181 fft->plan[1][1][1] = FFTWPREFIX(plan_many_dft)(1, &nx, howmany, p1, &nx, 1, nx, p1, &nx, 1, nx, FFTW_FORWARD, fftw_flags);
183 for (i = 0; i < 2; i++)
185 for (j = 0; j < 2; j++)
187 for (k = 0; k < 2; k++)
189 if (fft->plan[i][j][k] == NULL)
191 gmx_fatal(FARGS, "Error initializing FFTW3 plan.");
193 gmx_fft_destroy(fft);
195 FFTWPREFIX(free)(p1);
196 FFTWPREFIX(free)(p2);
204 FFTWPREFIX(free)(p1);
205 FFTWPREFIX(free)(p2);
207 fft->real_transform = 0;
216 gmx_fft_init_1d_real(gmx_fft_t * pfft,
220 return gmx_fft_init_many_1d_real(pfft, nx, 1, flags);
224 gmx_fft_init_many_1d_real(gmx_fft_t * pfft,
230 real *p1, *p2, *up1, *up2;
235 #ifdef GMX_DISABLE_FFTW_MEASURE
236 flags |= GMX_FFT_FLAG_CONSERVATIVE;
239 fftw_flags = (flags & GMX_FFT_FLAG_CONSERVATIVE) ? FFTW_ESTIMATE : FFTW_MEASURE;
243 gmx_fatal(FARGS, "Invalid opaque FFT datatype pointer.");
249 if ( (fft = (gmx_fft_t) FFTWPREFIX(malloc)(sizeof(struct gmx_fft))) == NULL)
255 /* allocate aligned, and extra memory to make it unaligned */
256 p1 = (real *) FFTWPREFIX(malloc)(sizeof(real)*(nx/2+1)*2*howmany + 8);
259 FFTWPREFIX(free)(fft);
264 p2 = (real *) FFTWPREFIX(malloc)(sizeof(real)*(nx/2+1)*2*howmany + 8);
267 FFTWPREFIX(free)(p1);
268 FFTWPREFIX(free)(fft);
273 /* make unaligned pointers.
274 * In double precision the actual complex datatype will be 16 bytes,
275 * so go to a char pointer and force an offset of 8 bytes instead.
285 /* int rank, const int *n, int howmany,
286 double *in, const int *inembed,
287 int istride, int idist,
288 fftw_complex *out, const int *onembed,
289 int ostride, int odist,
291 fft->plan[0][0][1] = FFTWPREFIX(plan_many_dft_r2c)(1, &nx, howmany, up1, 0, 1, (nx/2+1) *2, (FFTWPREFIX(complex) *) up2, 0, 1, (nx/2+1), fftw_flags);
292 fft->plan[0][1][1] = FFTWPREFIX(plan_many_dft_r2c)(1, &nx, howmany, up1, 0, 1, (nx/2+1) *2, (FFTWPREFIX(complex) *) up1, 0, 1, (nx/2+1), fftw_flags);
293 fft->plan[1][0][1] = FFTWPREFIX(plan_many_dft_r2c)(1, &nx, howmany, p1, 0, 1, (nx/2+1) *2, (FFTWPREFIX(complex) *) p2, 0, 1, (nx/2+1), fftw_flags);
294 fft->plan[1][1][1] = FFTWPREFIX(plan_many_dft_r2c)(1, &nx, howmany, p1, 0, 1, (nx/2+1) *2, (FFTWPREFIX(complex) *) p1, 0, 1, (nx/2+1), fftw_flags);
296 fft->plan[0][0][0] = FFTWPREFIX(plan_many_dft_c2r)(1, &nx, howmany, (FFTWPREFIX(complex) *) up1, 0, 1, (nx/2+1), up2, 0, 1, (nx/2+1) *2, fftw_flags);
297 fft->plan[0][1][0] = FFTWPREFIX(plan_many_dft_c2r)(1, &nx, howmany, (FFTWPREFIX(complex) *) up1, 0, 1, (nx/2+1), up1, 0, 1, (nx/2+1) *2, fftw_flags);
298 fft->plan[1][0][0] = FFTWPREFIX(plan_many_dft_c2r)(1, &nx, howmany, (FFTWPREFIX(complex) *) p1, 0, 1, (nx/2+1), p2, 0, 1, (nx/2+1) *2, fftw_flags);
299 fft->plan[1][1][0] = FFTWPREFIX(plan_many_dft_c2r)(1, &nx, howmany, (FFTWPREFIX(complex) *) p1, 0, 1, (nx/2+1), p1, 0, 1, (nx/2+1) *2, fftw_flags);
301 for (i = 0; i < 2; i++)
303 for (j = 0; j < 2; j++)
305 for (k = 0; k < 2; k++)
307 if (fft->plan[i][j][k] == NULL)
309 gmx_fatal(FARGS, "Error initializing FFTW3 plan.");
311 gmx_fft_destroy(fft);
313 FFTWPREFIX(free)(p1);
314 FFTWPREFIX(free)(p2);
322 FFTWPREFIX(free)(p1);
323 FFTWPREFIX(free)(p2);
325 fft->real_transform = 1;
335 gmx_fft_init_2d_real(gmx_fft_t * pfft,
341 real *p1, *p2, *up1, *up2;
346 #ifdef GMX_DISABLE_FFTW_MEASURE
347 flags |= GMX_FFT_FLAG_CONSERVATIVE;
350 fftw_flags = (flags & GMX_FFT_FLAG_CONSERVATIVE) ? FFTW_ESTIMATE : FFTW_MEASURE;
354 gmx_fatal(FARGS, "Invalid opaque FFT datatype pointer.");
360 if ( (fft = (gmx_fft_t) FFTWPREFIX(malloc)(sizeof(struct gmx_fft))) == NULL)
366 /* allocate aligned, and extra memory to make it unaligned */
367 p1 = (real *) FFTWPREFIX(malloc)(sizeof(real) *( nx*(ny/2+1)*2 + 2) );
370 FFTWPREFIX(free)(fft);
375 p2 = (real *) FFTWPREFIX(malloc)(sizeof(real) *( nx*(ny/2+1)*2 + 2) );
378 FFTWPREFIX(free)(p1);
379 FFTWPREFIX(free)(fft);
384 /* make unaligned pointers.
385 * In double precision the actual complex datatype will be 16 bytes,
386 * so go to a char pointer and force an offset of 8 bytes instead.
397 fft->plan[0][0][0] = FFTWPREFIX(plan_dft_c2r_2d)(nx, ny, (FFTWPREFIX(complex) *) up1, up2, fftw_flags);
398 fft->plan[0][0][1] = FFTWPREFIX(plan_dft_r2c_2d)(nx, ny, up1, (FFTWPREFIX(complex) *) up2, fftw_flags);
399 fft->plan[0][1][0] = FFTWPREFIX(plan_dft_c2r_2d)(nx, ny, (FFTWPREFIX(complex) *) up1, up1, fftw_flags);
400 fft->plan[0][1][1] = FFTWPREFIX(plan_dft_r2c_2d)(nx, ny, up1, (FFTWPREFIX(complex) *) up1, fftw_flags);
402 fft->plan[1][0][0] = FFTWPREFIX(plan_dft_c2r_2d)(nx, ny, (FFTWPREFIX(complex) *) p1, p2, fftw_flags);
403 fft->plan[1][0][1] = FFTWPREFIX(plan_dft_r2c_2d)(nx, ny, p1, (FFTWPREFIX(complex) *) p2, fftw_flags);
404 fft->plan[1][1][0] = FFTWPREFIX(plan_dft_c2r_2d)(nx, ny, (FFTWPREFIX(complex) *) p1, p1, fftw_flags);
405 fft->plan[1][1][1] = FFTWPREFIX(plan_dft_r2c_2d)(nx, ny, p1, (FFTWPREFIX(complex) *) p1, fftw_flags);
408 for (i = 0; i < 2; i++)
410 for (j = 0; j < 2; j++)
412 for (k = 0; k < 2; k++)
414 if (fft->plan[i][j][k] == NULL)
416 gmx_fatal(FARGS, "Error initializing FFTW3 plan.");
418 gmx_fft_destroy(fft);
420 FFTWPREFIX(free)(p1);
421 FFTWPREFIX(free)(p2);
429 FFTWPREFIX(free)(p1);
430 FFTWPREFIX(free)(p2);
432 fft->real_transform = 1;
441 gmx_fft_1d (gmx_fft_t fft,
442 enum gmx_fft_direction dir,
446 int aligned = ((((size_t)in_data | (size_t)out_data) & 0xf) == 0);
447 int inplace = (in_data == out_data);
448 int isforward = (dir == GMX_FFT_FORWARD);
451 if ( (fft->real_transform == 1) || (fft->ndim != 1) ||
452 ((dir != GMX_FFT_FORWARD) && (dir != GMX_FFT_BACKWARD)) )
454 gmx_fatal(FARGS, "FFT plan mismatch - bad plan or direction.");
458 FFTWPREFIX(execute_dft)(fft->plan[aligned][inplace][isforward],
459 (FFTWPREFIX(complex) *) in_data,
460 (FFTWPREFIX(complex) *) out_data);
466 gmx_fft_many_1d (gmx_fft_t fft,
467 enum gmx_fft_direction dir,
471 return gmx_fft_1d(fft, dir, in_data, out_data);
475 gmx_fft_1d_real (gmx_fft_t fft,
476 enum gmx_fft_direction dir,
480 int aligned = ((((size_t)in_data | (size_t)out_data) & 0xf) == 0);
481 int inplace = (in_data == out_data);
482 int isforward = (dir == GMX_FFT_REAL_TO_COMPLEX);
485 if ( (fft->real_transform != 1) || (fft->ndim != 1) ||
486 ((dir != GMX_FFT_REAL_TO_COMPLEX) && (dir != GMX_FFT_COMPLEX_TO_REAL)) )
488 gmx_fatal(FARGS, "FFT plan mismatch - bad plan or direction.");
494 FFTWPREFIX(execute_dft_r2c)(fft->plan[aligned][inplace][isforward],
495 (real *)in_data, (FFTWPREFIX(complex) *) out_data);
499 FFTWPREFIX(execute_dft_c2r)(fft->plan[aligned][inplace][isforward],
500 (FFTWPREFIX(complex) *) in_data, (real *)out_data);
507 gmx_fft_many_1d_real (gmx_fft_t fft,
508 enum gmx_fft_direction dir,
512 return gmx_fft_1d_real(fft, dir, in_data, out_data);
516 gmx_fft_2d_real (gmx_fft_t fft,
517 enum gmx_fft_direction dir,
521 int aligned = ((((size_t)in_data | (size_t)out_data) & 0xf) == 0);
522 int inplace = (in_data == out_data);
523 int isforward = (dir == GMX_FFT_REAL_TO_COMPLEX);
526 if ( (fft->real_transform != 1) || (fft->ndim != 2) ||
527 ((dir != GMX_FFT_REAL_TO_COMPLEX) && (dir != GMX_FFT_COMPLEX_TO_REAL)) )
529 gmx_fatal(FARGS, "FFT plan mismatch - bad plan or direction.");
535 FFTWPREFIX(execute_dft_r2c)(fft->plan[aligned][inplace][isforward],
537 (FFTWPREFIX(complex) *) out_data);
541 FFTWPREFIX(execute_dft_c2r)(fft->plan[aligned][inplace][isforward],
542 (FFTWPREFIX(complex) *) in_data,
551 gmx_fft_destroy(gmx_fft_t fft)
557 for (i = 0; i < 2; i++)
559 for (j = 0; j < 2; j++)
561 for (k = 0; k < 2; k++)
563 if (fft->plan[i][j][k] != NULL)
566 FFTWPREFIX(destroy_plan)(fft->plan[i][j][k]);
568 fft->plan[i][j][k] = NULL;
574 FFTWPREFIX(free)(fft);
581 gmx_many_fft_destroy(gmx_fft_t fft)
583 gmx_fft_destroy(fft);
586 void gmx_fft_cleanup()
588 FFTWPREFIX(cleanup)();
591 const char *gmx_fft_get_version_info()
593 #ifdef GMX_NATIVE_WINDOWS
596 return FFTWPREFIX(version);