1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * Gromacs 4.0 Copyright (c) 1991-2003
5 * David van der Spoel, Erik Lindahl, University of Groningen.
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
12 * To help us fund GROMACS development, we humbly ask that you cite
13 * the research papers on the package. Check out http://www.gromacs.org
16 * Gnomes, ROck Monsters And Chili Sauce
26 #include <mkl_service.h>
28 #include "gromacs/fft/fft.h"
29 #include "gmx_fatal.h"
32 /* For MKL version (<10.0), we should define MKL_LONG. */
34 #define MKL_LONG long int
39 #define GMX_DFTI_PREC DFTI_DOUBLE
41 #define GMX_DFTI_PREC DFTI_SINGLE
45 * Contents of the Intel MKL FFT fft datatype.
47 * Note that this is one of several possible implementations of gmx_fft_t.
49 * The MKL _API_ supports 1D,2D, and 3D transforms, including real-to-complex.
50 * Unfortunately the actual library implementation does not support 3D real
51 * transforms as of version 7.2, and versions before 7.0 don't support 2D real
52 * either. In addition, the multi-dimensional storage format for real data
53 * is not compatible with our padding.
55 * To work around this we roll our own 2D and 3D real-to-complex transforms,
56 * using separate X/Y/Z handles defined to perform (ny*nz), (nx*nz), and
57 * (nx*ny) transforms at once when necessary. To perform strided multiple
58 * transforms out-of-place (i.e., without padding in the last dimension)
59 * on the fly we also need to separate the forward and backward
60 * handles for real-to-complex/complex-to-real data permutation.
62 * This makes it necessary to define 3 handles for in-place FFTs, and 4 for
63 * the out-of-place transforms. Still, whenever possible we try to use
64 * a single 3D-transform handle instead.
66 * So, the handles are enumerated as follows:
68 * 1D FFT (real too): Index 0 is the handle for the entire FFT
69 * 2D complex FFT: Index 0 is the handle for the entire FFT
70 * 3D complex FFT: Index 0 is the handle for the entire FFT
71 * 2D, inplace real FFT: 0=FFTx, 1=FFTy handle
72 * 2D, ooplace real FFT: 0=FFTx, 1=real-to-complex FFTy, 2=complex-to-real FFTy
73 * 3D, inplace real FFT: 0=FFTx, 1=FFTy, 2=FFTz handle
74 * 3D, ooplace real FFT: 0=FFTx, 1=FFTy, 2=r2c FFTz, 3=c2r FFTz
76 * Intel people reading this: Learn from FFTW what a good interface looks like :-)
84 int ndim; /**< Number of dimensions in FFT */
85 int nx; /**< Length of X transform */
86 int ny; /**< Length of Y transform */
87 int nz; /**< Length of Z transform */
88 int real_fft; /**< 1 if real FFT, otherwise 0 */
89 DFTI_DESCRIPTOR * inplace[3]; /**< in-place FFT */
90 DFTI_DESCRIPTOR * ooplace[4]; /**< out-of-place FFT */
91 t_complex * work; /**< Enable out-of-place c2r FFT */
97 gmx_fft_init_1d(gmx_fft_t * pfft,
107 gmx_fatal(FARGS, "Invalid opaque FFT datatype pointer.");
112 if ( (fft = (gmx_fft_t)malloc(sizeof(struct gmx_fft))) == NULL)
117 /* Mark all handles invalid */
118 for (d = 0; d < 3; d++)
120 fft->inplace[d] = fft->ooplace[d] = NULL;
122 fft->ooplace[3] = NULL;
125 status = DftiCreateDescriptor(&fft->inplace[0], GMX_DFTI_PREC, DFTI_COMPLEX, 1, (MKL_LONG)nx);
129 status = DftiSetValue(fft->inplace[0], DFTI_PLACEMENT, DFTI_INPLACE);
134 status = DftiCommitDescriptor(fft->inplace[0]);
140 status = DftiCreateDescriptor(&fft->ooplace[0], GMX_DFTI_PREC, DFTI_COMPLEX, 1, (MKL_LONG)nx);
145 DftiSetValue(fft->ooplace[0], DFTI_PLACEMENT, DFTI_NOT_INPLACE);
150 DftiCommitDescriptor(fft->ooplace[0]);
156 gmx_fatal(FARGS, "Error initializing Intel MKL FFT; status=%d", status);
157 gmx_fft_destroy(fft);
173 gmx_fft_init_1d_real(gmx_fft_t * pfft,
183 gmx_fatal(FARGS, "Invalid opaque FFT datatype pointer.");
188 if ( (fft = (gmx_fft_t)malloc(sizeof(struct gmx_fft))) == NULL)
193 /* Mark all handles invalid */
194 for (d = 0; d < 3; d++)
196 fft->inplace[d] = fft->ooplace[d] = NULL;
198 fft->ooplace[3] = NULL;
200 status = DftiCreateDescriptor(&fft->inplace[0], GMX_DFTI_PREC, DFTI_REAL, 1, (MKL_LONG)nx);
204 status = DftiSetValue(fft->inplace[0], DFTI_PLACEMENT, DFTI_INPLACE);
209 status = DftiCommitDescriptor(fft->inplace[0]);
215 status = DftiCreateDescriptor(&fft->ooplace[0], GMX_DFTI_PREC, DFTI_REAL, 1, (MKL_LONG)nx);
220 status = DftiSetValue(fft->ooplace[0], DFTI_PLACEMENT, DFTI_NOT_INPLACE);
225 status = DftiCommitDescriptor(fft->ooplace[0]);
229 if (status == DFTI_UNIMPLEMENTED)
232 "The linked Intel MKL version (<6.0?) cannot do real FFTs.");
233 gmx_fft_destroy(fft);
240 gmx_fatal(FARGS, "Error initializing Intel MKL FFT; status=%d", status);
241 gmx_fft_destroy(fft);
257 gmx_fft_init_2d_real(gmx_fft_t * pfft,
270 gmx_fatal(FARGS, "Invalid opaque FFT datatype pointer.");
275 if ( (fft = (gmx_fft_t)malloc(sizeof(struct gmx_fft))) == NULL)
282 /* Mark all handles invalid */
283 for (d = 0; d < 3; d++)
285 fft->inplace[d] = fft->ooplace[d] = NULL;
287 fft->ooplace[3] = NULL;
289 /* Roll our own 2D real transform using multiple transforms in MKL,
290 * since the current MKL versions does not support our storage format,
291 * and all but the most recent don't even have 2D real FFTs.
295 status = DftiCreateDescriptor(&fft->inplace[0], GMX_DFTI_PREC, DFTI_COMPLEX, 1, (MKL_LONG)nx);
303 (DftiSetValue(fft->inplace[0], DFTI_PLACEMENT, DFTI_INPLACE) ||
304 DftiSetValue(fft->inplace[0], DFTI_NUMBER_OF_TRANSFORMS, nyc) ||
305 DftiSetValue(fft->inplace[0], DFTI_INPUT_DISTANCE, 1) ||
306 DftiSetValue(fft->inplace[0], DFTI_INPUT_STRIDES, stride) ||
307 DftiSetValue(fft->inplace[0], DFTI_OUTPUT_DISTANCE, 1) ||
308 DftiSetValue(fft->inplace[0], DFTI_OUTPUT_STRIDES, stride));
313 status = DftiCommitDescriptor(fft->inplace[0]);
316 /* Out-of-place X FFT */
319 status = DftiCreateDescriptor(&(fft->ooplace[0]), GMX_DFTI_PREC, DFTI_COMPLEX, 1, (MKL_LONG)nx);
328 (DftiSetValue(fft->ooplace[0], DFTI_PLACEMENT, DFTI_NOT_INPLACE) ||
329 DftiSetValue(fft->ooplace[0], DFTI_NUMBER_OF_TRANSFORMS, nyc) ||
330 DftiSetValue(fft->ooplace[0], DFTI_INPUT_DISTANCE, 1) ||
331 DftiSetValue(fft->ooplace[0], DFTI_INPUT_STRIDES, stride) ||
332 DftiSetValue(fft->ooplace[0], DFTI_OUTPUT_DISTANCE, 1) ||
333 DftiSetValue(fft->ooplace[0], DFTI_OUTPUT_STRIDES, stride));
338 status = DftiCommitDescriptor(fft->ooplace[0]);
345 status = DftiCreateDescriptor(&fft->inplace[1], GMX_DFTI_PREC, DFTI_REAL, 1, (MKL_LONG)ny);
354 (DftiSetValue(fft->inplace[1], DFTI_PLACEMENT, DFTI_INPLACE) ||
355 DftiSetValue(fft->inplace[1], DFTI_NUMBER_OF_TRANSFORMS, (MKL_LONG)nx) ||
356 DftiSetValue(fft->inplace[1], DFTI_INPUT_DISTANCE, 2*nyc) ||
357 DftiSetValue(fft->inplace[1], DFTI_INPUT_STRIDES, stride) ||
358 DftiSetValue(fft->inplace[1], DFTI_OUTPUT_DISTANCE, 2*nyc) ||
359 DftiSetValue(fft->inplace[1], DFTI_OUTPUT_STRIDES, stride) ||
360 DftiCommitDescriptor(fft->inplace[1]));
364 /* Out-of-place real-to-complex (affects output distance) Y FFT */
367 status = DftiCreateDescriptor(&fft->ooplace[1], GMX_DFTI_PREC, DFTI_REAL, 1, (MKL_LONG)ny);
376 (DftiSetValue(fft->ooplace[1], DFTI_PLACEMENT, DFTI_NOT_INPLACE) ||
377 DftiSetValue(fft->ooplace[1], DFTI_NUMBER_OF_TRANSFORMS, (MKL_LONG)nx) ||
378 DftiSetValue(fft->ooplace[1], DFTI_INPUT_DISTANCE, (MKL_LONG)ny) ||
379 DftiSetValue(fft->ooplace[1], DFTI_INPUT_STRIDES, stride) ||
380 DftiSetValue(fft->ooplace[1], DFTI_OUTPUT_DISTANCE, 2*nyc) ||
381 DftiSetValue(fft->ooplace[1], DFTI_OUTPUT_STRIDES, stride) ||
382 DftiCommitDescriptor(fft->ooplace[1]));
386 /* Out-of-place complex-to-real (affects output distance) Y FFT */
389 status = DftiCreateDescriptor(&fft->ooplace[2], GMX_DFTI_PREC, DFTI_REAL, 1, (MKL_LONG)ny);
398 (DftiSetValue(fft->ooplace[2], DFTI_PLACEMENT, DFTI_NOT_INPLACE) ||
399 DftiSetValue(fft->ooplace[2], DFTI_NUMBER_OF_TRANSFORMS, (MKL_LONG)nx) ||
400 DftiSetValue(fft->ooplace[2], DFTI_INPUT_DISTANCE, 2*nyc) ||
401 DftiSetValue(fft->ooplace[2], DFTI_INPUT_STRIDES, stride) ||
402 DftiSetValue(fft->ooplace[2], DFTI_OUTPUT_DISTANCE, (MKL_LONG)ny) ||
403 DftiSetValue(fft->ooplace[2], DFTI_OUTPUT_STRIDES, stride) ||
404 DftiCommitDescriptor(fft->ooplace[2]));
410 if ((fft->work = (t_complex *)malloc(sizeof(t_complex)*(nx*(ny/2+1)))) == NULL)
418 gmx_fatal(FARGS, "Error initializing Intel MKL FFT; status=%d", status);
419 gmx_fft_destroy(fft);
433 gmx_fft_1d(gmx_fft_t fft,
434 enum gmx_fft_direction dir,
438 int inplace = (in_data == out_data);
441 if ( (fft->real_fft == 1) || (fft->ndim != 1) ||
442 ((dir != GMX_FFT_FORWARD) && (dir != GMX_FFT_BACKWARD)) )
444 gmx_fatal(FARGS, "FFT plan mismatch - bad plan or direction.");
448 if (dir == GMX_FFT_FORWARD)
452 status = DftiComputeForward(fft->inplace[0], in_data);
456 status = DftiComputeForward(fft->ooplace[0], in_data, out_data);
463 status = DftiComputeBackward(fft->inplace[0], in_data);
467 status = DftiComputeBackward(fft->ooplace[0], in_data, out_data);
473 gmx_fatal(FARGS, "Error executing Intel MKL FFT.");
483 gmx_fft_1d_real(gmx_fft_t fft,
484 enum gmx_fft_direction dir,
488 int inplace = (in_data == out_data);
491 if ( (fft->real_fft != 1) || (fft->ndim != 1) ||
492 ((dir != GMX_FFT_REAL_TO_COMPLEX) && (dir != GMX_FFT_COMPLEX_TO_REAL)) )
494 gmx_fatal(FARGS, "FFT plan mismatch - bad plan or direction.");
498 if (dir == GMX_FFT_REAL_TO_COMPLEX)
502 status = DftiComputeForward(fft->inplace[0], in_data);
506 status = DftiComputeForward(fft->ooplace[0], in_data, out_data);
513 status = DftiComputeBackward(fft->inplace[0], in_data);
517 status = DftiComputeBackward(fft->ooplace[0], in_data, out_data);
523 gmx_fatal(FARGS, "Error executing Intel MKL FFT.");
532 gmx_fft_2d_real(gmx_fft_t fft,
533 enum gmx_fft_direction dir,
537 int inplace = (in_data == out_data);
540 if ( (fft->real_fft != 1) || (fft->ndim != 2) ||
541 ((dir != GMX_FFT_REAL_TO_COMPLEX) && (dir != GMX_FFT_COMPLEX_TO_REAL)) )
543 gmx_fatal(FARGS, "FFT plan mismatch - bad plan or direction.");
547 if (dir == GMX_FFT_REAL_TO_COMPLEX)
551 /* real-to-complex in Y dimension, in-place */
552 status = DftiComputeForward(fft->inplace[1], in_data);
554 /* complex-to-complex in X dimension, in-place */
557 status = DftiComputeForward(fft->inplace[0], in_data);
562 /* real-to-complex in Y dimension, in_data to out_data */
563 status = DftiComputeForward(fft->ooplace[1], in_data, out_data);
565 /* complex-to-complex in X dimension, in-place to out_data */
568 status = DftiComputeForward(fft->inplace[0], out_data);
574 /* prior implementation was incorrect. See fft.cpp unit test */
575 gmx_incons("Complex -> Real is not supported by MKL.");
580 gmx_fatal(FARGS, "Error executing Intel MKL FFT.");
588 gmx_fft_destroy(gmx_fft_t fft)
594 for (d = 0; d < 3; d++)
596 if (fft->inplace[d] != NULL)
598 DftiFreeDescriptor(&fft->inplace[d]);
600 if (fft->ooplace[d] != NULL)
602 DftiFreeDescriptor(&fft->ooplace[d]);
605 if (fft->ooplace[3] != NULL)
607 DftiFreeDescriptor(&fft->ooplace[3]);
609 if (fft->work != NULL)
617 void gmx_fft_cleanup()
622 const char *gmx_fft_get_version_info()