2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2003 David van der Spoel, Erik Lindahl, University of Groningen.
5 * Copyright (c) 2013,2014, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
44 #include <mkl_service.h>
46 #include "gromacs/fft/fft.h"
47 #include "gromacs/utility/fatalerror.h"
50 /* For MKL version (<10.0), we should define MKL_LONG. */
52 #define MKL_LONG long int
57 #define GMX_DFTI_PREC DFTI_DOUBLE
59 #define GMX_DFTI_PREC DFTI_SINGLE
64 * Contents of the Intel MKL FFT fft datatype.
66 * Note that this is one of several possible implementations of gmx_fft_t.
68 * The MKL _API_ supports 1D,2D, and 3D transforms, including real-to-complex.
69 * Unfortunately the actual library implementation does not support 3D real
70 * transforms as of version 7.2, and versions before 7.0 don't support 2D real
71 * either. In addition, the multi-dimensional storage format for real data
72 * is not compatible with our padding.
74 * To work around this we roll our own 2D and 3D real-to-complex transforms,
75 * using separate X/Y/Z handles defined to perform (ny*nz), (nx*nz), and
76 * (nx*ny) transforms at once when necessary. To perform strided multiple
77 * transforms out-of-place (i.e., without padding in the last dimension)
78 * on the fly we also need to separate the forward and backward
79 * handles for real-to-complex/complex-to-real data permutation.
81 * This makes it necessary to define 3 handles for in-place FFTs, and 4 for
82 * the out-of-place transforms. Still, whenever possible we try to use
83 * a single 3D-transform handle instead.
85 * So, the handles are enumerated as follows:
87 * 1D FFT (real too): Index 0 is the handle for the entire FFT
88 * 2D complex FFT: Index 0 is the handle for the entire FFT
89 * 3D complex FFT: Index 0 is the handle for the entire FFT
90 * 2D, inplace real FFT: 0=FFTx, 1=FFTy handle
91 * 2D, ooplace real FFT: 0=FFTx, 1=real-to-complex FFTy, 2=complex-to-real FFTy
92 * 3D, inplace real FFT: 0=FFTx, 1=FFTy, 2=FFTz handle
93 * 3D, ooplace real FFT: 0=FFTx, 1=FFTy, 2=r2c FFTz, 3=c2r FFTz
95 * Intel people reading this: Learn from FFTW what a good interface looks like :-)
103 int ndim; /**< Number of dimensions in FFT */
104 int nx; /**< Length of X transform */
105 int ny; /**< Length of Y transform */
106 int nz; /**< Length of Z transform */
107 int real_fft; /**< 1 if real FFT, otherwise 0 */
108 DFTI_DESCRIPTOR * inplace[3]; /**< in-place FFT */
109 DFTI_DESCRIPTOR * ooplace[4]; /**< out-of-place FFT */
110 t_complex * work; /**< Enable out-of-place c2r FFT */
116 gmx_fft_init_1d(gmx_fft_t * pfft,
126 gmx_fatal(FARGS, "Invalid opaque FFT datatype pointer.");
131 if ( (fft = (gmx_fft_t)malloc(sizeof(struct gmx_fft))) == NULL)
136 /* Mark all handles invalid */
137 for (d = 0; d < 3; d++)
139 fft->inplace[d] = fft->ooplace[d] = NULL;
141 fft->ooplace[3] = NULL;
144 status = DftiCreateDescriptor(&fft->inplace[0], GMX_DFTI_PREC, DFTI_COMPLEX, 1, (MKL_LONG)nx);
148 status = DftiSetValue(fft->inplace[0], DFTI_PLACEMENT, DFTI_INPLACE);
153 status = DftiCommitDescriptor(fft->inplace[0]);
159 status = DftiCreateDescriptor(&fft->ooplace[0], GMX_DFTI_PREC, DFTI_COMPLEX, 1, (MKL_LONG)nx);
164 DftiSetValue(fft->ooplace[0], DFTI_PLACEMENT, DFTI_NOT_INPLACE);
169 DftiCommitDescriptor(fft->ooplace[0]);
175 gmx_fatal(FARGS, "Error initializing Intel MKL FFT; status=%d", status);
176 gmx_fft_destroy(fft);
192 gmx_fft_init_1d_real(gmx_fft_t * pfft,
202 gmx_fatal(FARGS, "Invalid opaque FFT datatype pointer.");
207 if ( (fft = (gmx_fft_t)malloc(sizeof(struct gmx_fft))) == NULL)
212 /* Mark all handles invalid */
213 for (d = 0; d < 3; d++)
215 fft->inplace[d] = fft->ooplace[d] = NULL;
217 fft->ooplace[3] = NULL;
219 status = DftiCreateDescriptor(&fft->inplace[0], GMX_DFTI_PREC, DFTI_REAL, 1, (MKL_LONG)nx);
223 status = DftiSetValue(fft->inplace[0], DFTI_PLACEMENT, DFTI_INPLACE);
228 status = DftiCommitDescriptor(fft->inplace[0]);
234 status = DftiCreateDescriptor(&fft->ooplace[0], GMX_DFTI_PREC, DFTI_REAL, 1, (MKL_LONG)nx);
239 status = DftiSetValue(fft->ooplace[0], DFTI_PLACEMENT, DFTI_NOT_INPLACE);
244 status = DftiCommitDescriptor(fft->ooplace[0]);
248 if (status == DFTI_UNIMPLEMENTED)
251 "The linked Intel MKL version (<6.0?) cannot do real FFTs.");
252 gmx_fft_destroy(fft);
259 gmx_fatal(FARGS, "Error initializing Intel MKL FFT; status=%d", status);
260 gmx_fft_destroy(fft);
276 gmx_fft_init_2d_real(gmx_fft_t * pfft,
289 gmx_fatal(FARGS, "Invalid opaque FFT datatype pointer.");
294 if ( (fft = (gmx_fft_t)malloc(sizeof(struct gmx_fft))) == NULL)
301 /* Mark all handles invalid */
302 for (d = 0; d < 3; d++)
304 fft->inplace[d] = fft->ooplace[d] = NULL;
306 fft->ooplace[3] = NULL;
308 /* Roll our own 2D real transform using multiple transforms in MKL,
309 * since the current MKL versions does not support our storage format,
310 * and all but the most recent don't even have 2D real FFTs.
314 status = DftiCreateDescriptor(&fft->inplace[0], GMX_DFTI_PREC, DFTI_COMPLEX, 1, (MKL_LONG)nx);
322 (DftiSetValue(fft->inplace[0], DFTI_PLACEMENT, DFTI_INPLACE) ||
323 DftiSetValue(fft->inplace[0], DFTI_NUMBER_OF_TRANSFORMS, nyc) ||
324 DftiSetValue(fft->inplace[0], DFTI_INPUT_DISTANCE, 1) ||
325 DftiSetValue(fft->inplace[0], DFTI_INPUT_STRIDES, stride) ||
326 DftiSetValue(fft->inplace[0], DFTI_OUTPUT_DISTANCE, 1) ||
327 DftiSetValue(fft->inplace[0], DFTI_OUTPUT_STRIDES, stride));
332 status = DftiCommitDescriptor(fft->inplace[0]);
335 /* Out-of-place X FFT */
338 status = DftiCreateDescriptor(&(fft->ooplace[0]), GMX_DFTI_PREC, DFTI_COMPLEX, 1, (MKL_LONG)nx);
347 (DftiSetValue(fft->ooplace[0], DFTI_PLACEMENT, DFTI_NOT_INPLACE) ||
348 DftiSetValue(fft->ooplace[0], DFTI_NUMBER_OF_TRANSFORMS, nyc) ||
349 DftiSetValue(fft->ooplace[0], DFTI_INPUT_DISTANCE, 1) ||
350 DftiSetValue(fft->ooplace[0], DFTI_INPUT_STRIDES, stride) ||
351 DftiSetValue(fft->ooplace[0], DFTI_OUTPUT_DISTANCE, 1) ||
352 DftiSetValue(fft->ooplace[0], DFTI_OUTPUT_STRIDES, stride));
357 status = DftiCommitDescriptor(fft->ooplace[0]);
364 status = DftiCreateDescriptor(&fft->inplace[1], GMX_DFTI_PREC, DFTI_REAL, 1, (MKL_LONG)ny);
373 (DftiSetValue(fft->inplace[1], DFTI_PLACEMENT, DFTI_INPLACE) ||
374 DftiSetValue(fft->inplace[1], DFTI_NUMBER_OF_TRANSFORMS, (MKL_LONG)nx) ||
375 DftiSetValue(fft->inplace[1], DFTI_INPUT_DISTANCE, 2*nyc) ||
376 DftiSetValue(fft->inplace[1], DFTI_INPUT_STRIDES, stride) ||
377 DftiSetValue(fft->inplace[1], DFTI_OUTPUT_DISTANCE, 2*nyc) ||
378 DftiSetValue(fft->inplace[1], DFTI_OUTPUT_STRIDES, stride) ||
379 DftiCommitDescriptor(fft->inplace[1]));
383 /* Out-of-place real-to-complex (affects output distance) Y FFT */
386 status = DftiCreateDescriptor(&fft->ooplace[1], GMX_DFTI_PREC, DFTI_REAL, 1, (MKL_LONG)ny);
395 (DftiSetValue(fft->ooplace[1], DFTI_PLACEMENT, DFTI_NOT_INPLACE) ||
396 DftiSetValue(fft->ooplace[1], DFTI_NUMBER_OF_TRANSFORMS, (MKL_LONG)nx) ||
397 DftiSetValue(fft->ooplace[1], DFTI_INPUT_DISTANCE, (MKL_LONG)ny) ||
398 DftiSetValue(fft->ooplace[1], DFTI_INPUT_STRIDES, stride) ||
399 DftiSetValue(fft->ooplace[1], DFTI_OUTPUT_DISTANCE, 2*nyc) ||
400 DftiSetValue(fft->ooplace[1], DFTI_OUTPUT_STRIDES, stride) ||
401 DftiCommitDescriptor(fft->ooplace[1]));
405 /* Out-of-place complex-to-real (affects output distance) Y FFT */
408 status = DftiCreateDescriptor(&fft->ooplace[2], GMX_DFTI_PREC, DFTI_REAL, 1, (MKL_LONG)ny);
417 (DftiSetValue(fft->ooplace[2], DFTI_PLACEMENT, DFTI_NOT_INPLACE) ||
418 DftiSetValue(fft->ooplace[2], DFTI_NUMBER_OF_TRANSFORMS, (MKL_LONG)nx) ||
419 DftiSetValue(fft->ooplace[2], DFTI_INPUT_DISTANCE, 2*nyc) ||
420 DftiSetValue(fft->ooplace[2], DFTI_INPUT_STRIDES, stride) ||
421 DftiSetValue(fft->ooplace[2], DFTI_OUTPUT_DISTANCE, (MKL_LONG)ny) ||
422 DftiSetValue(fft->ooplace[2], DFTI_OUTPUT_STRIDES, stride) ||
423 DftiCommitDescriptor(fft->ooplace[2]));
429 if ((fft->work = (t_complex *)malloc(sizeof(t_complex)*(nx*(ny/2+1)))) == NULL)
437 gmx_fatal(FARGS, "Error initializing Intel MKL FFT; status=%d", status);
438 gmx_fft_destroy(fft);
452 gmx_fft_1d(gmx_fft_t fft,
453 enum gmx_fft_direction dir,
457 int inplace = (in_data == out_data);
460 if ( (fft->real_fft == 1) || (fft->ndim != 1) ||
461 ((dir != GMX_FFT_FORWARD) && (dir != GMX_FFT_BACKWARD)) )
463 gmx_fatal(FARGS, "FFT plan mismatch - bad plan or direction.");
467 if (dir == GMX_FFT_FORWARD)
471 status = DftiComputeForward(fft->inplace[0], in_data);
475 status = DftiComputeForward(fft->ooplace[0], in_data, out_data);
482 status = DftiComputeBackward(fft->inplace[0], in_data);
486 status = DftiComputeBackward(fft->ooplace[0], in_data, out_data);
492 gmx_fatal(FARGS, "Error executing Intel MKL FFT.");
502 gmx_fft_1d_real(gmx_fft_t fft,
503 enum gmx_fft_direction dir,
507 int inplace = (in_data == out_data);
510 if ( (fft->real_fft != 1) || (fft->ndim != 1) ||
511 ((dir != GMX_FFT_REAL_TO_COMPLEX) && (dir != GMX_FFT_COMPLEX_TO_REAL)) )
513 gmx_fatal(FARGS, "FFT plan mismatch - bad plan or direction.");
517 if (dir == GMX_FFT_REAL_TO_COMPLEX)
521 status = DftiComputeForward(fft->inplace[0], in_data);
525 status = DftiComputeForward(fft->ooplace[0], in_data, out_data);
532 status = DftiComputeBackward(fft->inplace[0], in_data);
536 status = DftiComputeBackward(fft->ooplace[0], in_data, out_data);
542 gmx_fatal(FARGS, "Error executing Intel MKL FFT.");
551 gmx_fft_2d_real(gmx_fft_t fft,
552 enum gmx_fft_direction dir,
556 int inplace = (in_data == out_data);
559 if ( (fft->real_fft != 1) || (fft->ndim != 2) ||
560 ((dir != GMX_FFT_REAL_TO_COMPLEX) && (dir != GMX_FFT_COMPLEX_TO_REAL)) )
562 gmx_fatal(FARGS, "FFT plan mismatch - bad plan or direction.");
566 if (dir == GMX_FFT_REAL_TO_COMPLEX)
570 /* real-to-complex in Y dimension, in-place */
571 status = DftiComputeForward(fft->inplace[1], in_data);
573 /* complex-to-complex in X dimension, in-place */
576 status = DftiComputeForward(fft->inplace[0], in_data);
581 /* real-to-complex in Y dimension, in_data to out_data */
582 status = DftiComputeForward(fft->ooplace[1], in_data, out_data);
584 /* complex-to-complex in X dimension, in-place to out_data */
587 status = DftiComputeForward(fft->inplace[0], out_data);
593 /* prior implementation was incorrect. See fft.cpp unit test */
594 gmx_incons("Complex -> Real is not supported by MKL.");
599 gmx_fatal(FARGS, "Error executing Intel MKL FFT.");
607 gmx_fft_destroy(gmx_fft_t fft)
613 for (d = 0; d < 3; d++)
615 if (fft->inplace[d] != NULL)
617 DftiFreeDescriptor(&fft->inplace[d]);
619 if (fft->ooplace[d] != NULL)
621 DftiFreeDescriptor(&fft->ooplace[d]);
624 if (fft->ooplace[3] != NULL)
626 DftiFreeDescriptor(&fft->ooplace[3]);
628 if (fft->work != NULL)
636 void gmx_fft_cleanup()
641 const char *gmx_fft_get_version_info()