3 * Gromacs 4.0 Copyright (c) 1991-2003
4 * David van der Spoel, Erik Lindahl, University of Groningen.
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
11 * To help us fund GROMACS development, we humbly ask that you cite
12 * the research papers on the package. Check out http://www.gromacs.org
15 * Gnomes, ROck Monsters And Chili Sauce
26 #include "gromacs/fft/fft.h"
27 #include "gmx_fatal.h"
30 #define FFTWPREFIX(name) fftw_ ## name
32 #define FFTWPREFIX(name) fftwf_ ## name
35 #include "thread_mpi/mutex.h"
36 #include "gromacs/utility/exceptions.h"
38 /* none of the fftw3 calls, except execute(), are thread-safe, so
39 we need to serialize them with this mutex. */
40 static tMPI::mutex big_fftw_mutex;
41 #define FFTW_LOCK try { big_fftw_mutex.lock(); } GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
42 #define FFTW_UNLOCK try { big_fftw_mutex.unlock(); } GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
44 /* We assume here that aligned memory starts at multiple of 16 bytes and unaligned memory starts at multiple of 8 bytes. The later is guranteed for all malloc implementation.
46 - It is not allowed to use these FFT plans from memory which doesn't have a starting address as a multiple of 8 bytes.
47 This is OK as long as the memory directly comes from malloc and is not some subarray within alloated memory.
48 - This has to be fixed if any future architecute requires memory to be aligned to multiples of 32 bytes.
52 * Contents of the FFTW3 fft datatype.
54 * Note that this is one of several possible implementations of gmx_fft_t.
65 * Three alternatives (unaligned/aligned, out-of-place/in-place, forward/backward)
66 * results in 8 different FFTW plans. Keep track of them with 3 array indices:
67 * first index: 0=unaligned, 1=aligned
68 * second index: 0=out-of-place, 1=in-place
69 * third index: 0=backward, 1=forward
71 FFTWPREFIX(plan) plan[2][2][2];
72 /** Used to catch user mistakes */
74 /** Number of dimensions in the FFT */
79 gmx_fft_init_1d(gmx_fft_t * pfft,
83 return gmx_fft_init_many_1d(pfft, nx, 1, flags);
88 gmx_fft_init_many_1d(gmx_fft_t * pfft,
94 FFTWPREFIX(complex) *p1, *p2, *up1, *up2;
99 #ifdef GMX_DISABLE_FFTW_MEASURE
100 flags |= GMX_FFT_FLAG_CONSERVATIVE;
103 fftw_flags = (flags & GMX_FFT_FLAG_CONSERVATIVE) ? FFTW_ESTIMATE : FFTW_MEASURE;
107 gmx_fatal(FARGS, "Invalid opaque FFT datatype pointer.");
113 if ( (fft = (gmx_fft_t)FFTWPREFIX(malloc)(sizeof(struct gmx_fft))) == NULL)
119 /* allocate aligned, and extra memory to make it unaligned */
120 p1 = (FFTWPREFIX(complex) *) FFTWPREFIX(malloc)(sizeof(FFTWPREFIX(complex))*(nx+2)*howmany);
123 FFTWPREFIX(free)(fft);
128 p2 = (FFTWPREFIX(complex) *) FFTWPREFIX(malloc)(sizeof(FFTWPREFIX(complex))*(nx+2)*howmany);
131 FFTWPREFIX(free)(p1);
132 FFTWPREFIX(free)(fft);
137 /* make unaligned pointers.
138 * In double precision the actual complex datatype will be 16 bytes,
139 * so go to a char pointer and force an offset of 8 bytes instead.
143 up1 = (FFTWPREFIX(complex) *)pc;
147 up2 = (FFTWPREFIX(complex) *)pc;
149 /* int rank, const int *n, int howmany,
150 fftw_complex *in, const int *inembed,
151 int istride, int idist,
152 fftw_complex *out, const int *onembed,
153 int ostride, int odist,
154 int sign, unsigned flags */
155 fft->plan[0][0][0] = FFTWPREFIX(plan_many_dft)(1, &nx, howmany, up1, &nx, 1, nx, up2, &nx, 1, nx, FFTW_BACKWARD, fftw_flags);
156 fft->plan[0][0][1] = FFTWPREFIX(plan_many_dft)(1, &nx, howmany, up1, &nx, 1, nx, up2, &nx, 1, nx, FFTW_FORWARD, fftw_flags);
157 fft->plan[0][1][0] = FFTWPREFIX(plan_many_dft)(1, &nx, howmany, up1, &nx, 1, nx, up1, &nx, 1, nx, FFTW_BACKWARD, fftw_flags);
158 fft->plan[0][1][1] = FFTWPREFIX(plan_many_dft)(1, &nx, howmany, up1, &nx, 1, nx, up1, &nx, 1, nx, FFTW_FORWARD, fftw_flags);
159 fft->plan[1][0][0] = FFTWPREFIX(plan_many_dft)(1, &nx, howmany, p1, &nx, 1, nx, p2, &nx, 1, nx, FFTW_BACKWARD, fftw_flags);
160 fft->plan[1][0][1] = FFTWPREFIX(plan_many_dft)(1, &nx, howmany, p1, &nx, 1, nx, p2, &nx, 1, nx, FFTW_FORWARD, fftw_flags);
161 fft->plan[1][1][0] = FFTWPREFIX(plan_many_dft)(1, &nx, howmany, p1, &nx, 1, nx, p1, &nx, 1, nx, FFTW_BACKWARD, fftw_flags);
162 fft->plan[1][1][1] = FFTWPREFIX(plan_many_dft)(1, &nx, howmany, p1, &nx, 1, nx, p1, &nx, 1, nx, FFTW_FORWARD, fftw_flags);
164 for (i = 0; i < 2; i++)
166 for (j = 0; j < 2; j++)
168 for (k = 0; k < 2; k++)
170 if (fft->plan[i][j][k] == NULL)
172 gmx_fatal(FARGS, "Error initializing FFTW3 plan.");
174 gmx_fft_destroy(fft);
176 FFTWPREFIX(free)(p1);
177 FFTWPREFIX(free)(p2);
185 FFTWPREFIX(free)(p1);
186 FFTWPREFIX(free)(p2);
188 fft->real_transform = 0;
197 gmx_fft_init_1d_real(gmx_fft_t * pfft,
201 return gmx_fft_init_many_1d_real(pfft, nx, 1, flags);
205 gmx_fft_init_many_1d_real(gmx_fft_t * pfft,
211 real *p1, *p2, *up1, *up2;
216 #ifdef GMX_DISABLE_FFTW_MEASURE
217 flags |= GMX_FFT_FLAG_CONSERVATIVE;
220 fftw_flags = (flags & GMX_FFT_FLAG_CONSERVATIVE) ? FFTW_ESTIMATE : FFTW_MEASURE;
224 gmx_fatal(FARGS, "Invalid opaque FFT datatype pointer.");
230 if ( (fft = (gmx_fft_t) FFTWPREFIX(malloc)(sizeof(struct gmx_fft))) == NULL)
236 /* allocate aligned, and extra memory to make it unaligned */
237 p1 = (real *) FFTWPREFIX(malloc)(sizeof(real)*(nx/2+1)*2*howmany + 8);
240 FFTWPREFIX(free)(fft);
245 p2 = (real *) FFTWPREFIX(malloc)(sizeof(real)*(nx/2+1)*2*howmany + 8);
248 FFTWPREFIX(free)(p1);
249 FFTWPREFIX(free)(fft);
254 /* make unaligned pointers.
255 * In double precision the actual complex datatype will be 16 bytes,
256 * so go to a char pointer and force an offset of 8 bytes instead.
266 /* int rank, const int *n, int howmany,
267 double *in, const int *inembed,
268 int istride, int idist,
269 fftw_complex *out, const int *onembed,
270 int ostride, int odist,
272 fft->plan[0][0][1] = FFTWPREFIX(plan_many_dft_r2c)(1, &nx, howmany, up1, 0, 1, (nx/2+1) *2, (FFTWPREFIX(complex) *) up2, 0, 1, (nx/2+1), fftw_flags);
273 fft->plan[0][1][1] = FFTWPREFIX(plan_many_dft_r2c)(1, &nx, howmany, up1, 0, 1, (nx/2+1) *2, (FFTWPREFIX(complex) *) up1, 0, 1, (nx/2+1), fftw_flags);
274 fft->plan[1][0][1] = FFTWPREFIX(plan_many_dft_r2c)(1, &nx, howmany, p1, 0, 1, (nx/2+1) *2, (FFTWPREFIX(complex) *) p2, 0, 1, (nx/2+1), fftw_flags);
275 fft->plan[1][1][1] = FFTWPREFIX(plan_many_dft_r2c)(1, &nx, howmany, p1, 0, 1, (nx/2+1) *2, (FFTWPREFIX(complex) *) p1, 0, 1, (nx/2+1), fftw_flags);
277 fft->plan[0][0][0] = FFTWPREFIX(plan_many_dft_c2r)(1, &nx, howmany, (FFTWPREFIX(complex) *) up1, 0, 1, (nx/2+1), up2, 0, 1, (nx/2+1) *2, fftw_flags);
278 fft->plan[0][1][0] = FFTWPREFIX(plan_many_dft_c2r)(1, &nx, howmany, (FFTWPREFIX(complex) *) up1, 0, 1, (nx/2+1), up1, 0, 1, (nx/2+1) *2, fftw_flags);
279 fft->plan[1][0][0] = FFTWPREFIX(plan_many_dft_c2r)(1, &nx, howmany, (FFTWPREFIX(complex) *) p1, 0, 1, (nx/2+1), p2, 0, 1, (nx/2+1) *2, fftw_flags);
280 fft->plan[1][1][0] = FFTWPREFIX(plan_many_dft_c2r)(1, &nx, howmany, (FFTWPREFIX(complex) *) p1, 0, 1, (nx/2+1), p1, 0, 1, (nx/2+1) *2, fftw_flags);
282 for (i = 0; i < 2; i++)
284 for (j = 0; j < 2; j++)
286 for (k = 0; k < 2; k++)
288 if (fft->plan[i][j][k] == NULL)
290 gmx_fatal(FARGS, "Error initializing FFTW3 plan.");
292 gmx_fft_destroy(fft);
294 FFTWPREFIX(free)(p1);
295 FFTWPREFIX(free)(p2);
303 FFTWPREFIX(free)(p1);
304 FFTWPREFIX(free)(p2);
306 fft->real_transform = 1;
316 gmx_fft_init_2d_real(gmx_fft_t * pfft,
322 real *p1, *p2, *up1, *up2;
327 #ifdef GMX_DISABLE_FFTW_MEASURE
328 flags |= GMX_FFT_FLAG_CONSERVATIVE;
331 fftw_flags = (flags & GMX_FFT_FLAG_CONSERVATIVE) ? FFTW_ESTIMATE : FFTW_MEASURE;
335 gmx_fatal(FARGS, "Invalid opaque FFT datatype pointer.");
341 if ( (fft = (gmx_fft_t) FFTWPREFIX(malloc)(sizeof(struct gmx_fft))) == NULL)
347 /* allocate aligned, and extra memory to make it unaligned */
348 p1 = (real *) FFTWPREFIX(malloc)(sizeof(real) *( nx*(ny/2+1)*2 + 2) );
351 FFTWPREFIX(free)(fft);
356 p2 = (real *) FFTWPREFIX(malloc)(sizeof(real) *( nx*(ny/2+1)*2 + 2) );
359 FFTWPREFIX(free)(p1);
360 FFTWPREFIX(free)(fft);
365 /* make unaligned pointers.
366 * In double precision the actual complex datatype will be 16 bytes,
367 * so go to a char pointer and force an offset of 8 bytes instead.
378 fft->plan[0][0][0] = FFTWPREFIX(plan_dft_c2r_2d)(nx, ny, (FFTWPREFIX(complex) *) up1, up2, fftw_flags);
379 fft->plan[0][0][1] = FFTWPREFIX(plan_dft_r2c_2d)(nx, ny, up1, (FFTWPREFIX(complex) *) up2, fftw_flags);
380 fft->plan[0][1][0] = FFTWPREFIX(plan_dft_c2r_2d)(nx, ny, (FFTWPREFIX(complex) *) up1, up1, fftw_flags);
381 fft->plan[0][1][1] = FFTWPREFIX(plan_dft_r2c_2d)(nx, ny, up1, (FFTWPREFIX(complex) *) up1, fftw_flags);
383 fft->plan[1][0][0] = FFTWPREFIX(plan_dft_c2r_2d)(nx, ny, (FFTWPREFIX(complex) *) p1, p2, fftw_flags);
384 fft->plan[1][0][1] = FFTWPREFIX(plan_dft_r2c_2d)(nx, ny, p1, (FFTWPREFIX(complex) *) p2, fftw_flags);
385 fft->plan[1][1][0] = FFTWPREFIX(plan_dft_c2r_2d)(nx, ny, (FFTWPREFIX(complex) *) p1, p1, fftw_flags);
386 fft->plan[1][1][1] = FFTWPREFIX(plan_dft_r2c_2d)(nx, ny, p1, (FFTWPREFIX(complex) *) p1, fftw_flags);
389 for (i = 0; i < 2; i++)
391 for (j = 0; j < 2; j++)
393 for (k = 0; k < 2; k++)
395 if (fft->plan[i][j][k] == NULL)
397 gmx_fatal(FARGS, "Error initializing FFTW3 plan.");
399 gmx_fft_destroy(fft);
401 FFTWPREFIX(free)(p1);
402 FFTWPREFIX(free)(p2);
410 FFTWPREFIX(free)(p1);
411 FFTWPREFIX(free)(p2);
413 fft->real_transform = 1;
422 gmx_fft_1d (gmx_fft_t fft,
423 enum gmx_fft_direction dir,
427 int aligned = ((((size_t)in_data | (size_t)out_data) & 0xf) == 0);
428 int inplace = (in_data == out_data);
429 int isforward = (dir == GMX_FFT_FORWARD);
432 if ( (fft->real_transform == 1) || (fft->ndim != 1) ||
433 ((dir != GMX_FFT_FORWARD) && (dir != GMX_FFT_BACKWARD)) )
435 gmx_fatal(FARGS, "FFT plan mismatch - bad plan or direction.");
439 FFTWPREFIX(execute_dft)(fft->plan[aligned][inplace][isforward],
440 (FFTWPREFIX(complex) *) in_data,
441 (FFTWPREFIX(complex) *) out_data);
447 gmx_fft_many_1d (gmx_fft_t fft,
448 enum gmx_fft_direction dir,
452 return gmx_fft_1d(fft, dir, in_data, out_data);
456 gmx_fft_1d_real (gmx_fft_t fft,
457 enum gmx_fft_direction dir,
461 int aligned = ((((size_t)in_data | (size_t)out_data) & 0xf) == 0);
462 int inplace = (in_data == out_data);
463 int isforward = (dir == GMX_FFT_REAL_TO_COMPLEX);
466 if ( (fft->real_transform != 1) || (fft->ndim != 1) ||
467 ((dir != GMX_FFT_REAL_TO_COMPLEX) && (dir != GMX_FFT_COMPLEX_TO_REAL)) )
469 gmx_fatal(FARGS, "FFT plan mismatch - bad plan or direction.");
475 FFTWPREFIX(execute_dft_r2c)(fft->plan[aligned][inplace][isforward],
476 (real *)in_data, (FFTWPREFIX(complex) *) out_data);
480 FFTWPREFIX(execute_dft_c2r)(fft->plan[aligned][inplace][isforward],
481 (FFTWPREFIX(complex) *) in_data, (real *)out_data);
488 gmx_fft_many_1d_real (gmx_fft_t fft,
489 enum gmx_fft_direction dir,
493 return gmx_fft_1d_real(fft, dir, in_data, out_data);
497 gmx_fft_2d_real (gmx_fft_t fft,
498 enum gmx_fft_direction dir,
502 int aligned = ((((size_t)in_data | (size_t)out_data) & 0xf) == 0);
503 int inplace = (in_data == out_data);
504 int isforward = (dir == GMX_FFT_REAL_TO_COMPLEX);
507 if ( (fft->real_transform != 1) || (fft->ndim != 2) ||
508 ((dir != GMX_FFT_REAL_TO_COMPLEX) && (dir != GMX_FFT_COMPLEX_TO_REAL)) )
510 gmx_fatal(FARGS, "FFT plan mismatch - bad plan or direction.");
516 FFTWPREFIX(execute_dft_r2c)(fft->plan[aligned][inplace][isforward],
518 (FFTWPREFIX(complex) *) out_data);
522 FFTWPREFIX(execute_dft_c2r)(fft->plan[aligned][inplace][isforward],
523 (FFTWPREFIX(complex) *) in_data,
532 gmx_fft_destroy(gmx_fft_t fft)
538 for (i = 0; i < 2; i++)
540 for (j = 0; j < 2; j++)
542 for (k = 0; k < 2; k++)
544 if (fft->plan[i][j][k] != NULL)
547 FFTWPREFIX(destroy_plan)(fft->plan[i][j][k]);
549 fft->plan[i][j][k] = NULL;
555 FFTWPREFIX(free)(fft);
562 gmx_many_fft_destroy(gmx_fft_t fft)
564 gmx_fft_destroy(fft);
567 void gmx_fft_cleanup()
569 FFTWPREFIX(cleanup)();
572 const char *gmx_fft_get_version_info()
574 #ifdef GMX_NATIVE_WINDOWS
577 return FFTWPREFIX(version);