2 * This file is part of the GROMACS molecular simulation package.
4 * Gromacs 4.0 Copyright (c) 1991-2003
5 * David van der Spoel, Erik Lindahl, University of Groningen.
6 * Copyright (c) 2012, by the GROMACS development team, led by
7 * David van der Spoel, Berk Hess, Erik Lindahl, and including many
8 * others, as listed in the AUTHORS file in the top-level source
9 * directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
50 #include "gmx_fatal.h"
53 /* For MKL version (<10.0), we should define MKL_LONG. */
55 #define MKL_LONG long int
60 #define GMX_DFTI_PREC DFTI_DOUBLE
62 #define GMX_DFTI_PREC DFTI_SINGLE
65 /* Contents of the Intel MKL FFT fft datatype.
67 * Note that this is one of several possible implementations of gmx_fft_t.
69 * The MKL _API_ supports 1D,2D, and 3D transforms, including real-to-complex.
70 * Unfortunately the actual library implementation does not support 3D real
71 * transforms as of version 7.2, and versions before 7.0 don't support 2D real
72 * either. In addition, the multi-dimensional storage format for real data
73 * is not compatible with our padding.
75 * To work around this we roll our own 2D and 3D real-to-complex transforms,
76 * using separate X/Y/Z handles defined to perform (ny*nz), (nx*nz), and
77 * (nx*ny) transforms at once when necessary. To perform strided multiple
78 * transforms out-of-place (i.e., without padding in the last dimension)
79 * on the fly we also need to separate the forward and backward
80 * handles for real-to-complex/complex-to-real data permutation.
82 * This makes it necessary to define 3 handles for in-place FFTs, and 4 for
83 * the out-of-place transforms. Still, whenever possible we try to use
84 * a single 3D-transform handle instead.
86 * So, the handles are enumerated as follows:
88 * 1D FFT (real too): Index 0 is the handle for the entire FFT
89 * 2D complex FFT: Index 0 is the handle for the entire FFT
90 * 3D complex FFT: Index 0 is the handle for the entire FFT
91 * 2D, inplace real FFT: 0=FFTx, 1=FFTy handle
92 * 2D, ooplace real FFT: 0=FFTx, 1=real-to-complex FFTy, 2=complex-to-real FFTy
93 * 3D, inplace real FFT: 0=FFTx, 1=FFTy, 2=FFTz handle
94 * 3D, ooplace real FFT: 0=FFTx, 1=FFTy, 2=r2c FFTz, 3=c2r FFTz
96 * Intel people reading this: Learn from FFTW what a good interface looks like :-)
101 int ndim; /**< Number of dimensions in FFT */
102 int nx; /**< Length of X transform */
103 int ny; /**< Length of Y transform */
104 int nz; /**< Length of Z transform */
105 int real_fft; /**< 1 if real FFT, otherwise 0 */
106 DFTI_DESCRIPTOR * inplace[3]; /**< in-place FFT */
107 DFTI_DESCRIPTOR * ooplace[4]; /**< out-of-place FFT */
108 t_complex * work; /**< Enable out-of-place c2r FFT */
114 gmx_fft_init_1d(gmx_fft_t * pfft,
124 gmx_fatal(FARGS,"Invalid opaque FFT datatype pointer.");
129 if( (fft = (gmx_fft_t)malloc(sizeof(struct gmx_fft))) == NULL)
134 /* Mark all handles invalid */
137 fft->inplace[d] = fft->ooplace[d] = NULL;
139 fft->ooplace[3] = NULL;
142 status = DftiCreateDescriptor(&fft->inplace[0],GMX_DFTI_PREC,DFTI_COMPLEX,1,(MKL_LONG)nx);
145 status = DftiSetValue(fft->inplace[0],DFTI_PLACEMENT,DFTI_INPLACE);
148 status = DftiCommitDescriptor(fft->inplace[0]);
152 status = DftiCreateDescriptor(&fft->ooplace[0],GMX_DFTI_PREC,DFTI_COMPLEX,1,(MKL_LONG)nx);
155 DftiSetValue(fft->ooplace[0],DFTI_PLACEMENT,DFTI_NOT_INPLACE);
158 DftiCommitDescriptor(fft->ooplace[0]);
163 gmx_fatal(FARGS,"Error initializing Intel MKL FFT; status=%d",status);
164 gmx_fft_destroy(fft);
180 gmx_fft_init_1d_real(gmx_fft_t * pfft,
190 gmx_fatal(FARGS,"Invalid opaque FFT datatype pointer.");
195 if( (fft = (gmx_fft_t)malloc(sizeof(struct gmx_fft))) == NULL)
200 /* Mark all handles invalid */
203 fft->inplace[d] = fft->ooplace[d] = NULL;
205 fft->ooplace[3] = NULL;
207 status = DftiCreateDescriptor(&fft->inplace[0],GMX_DFTI_PREC,DFTI_REAL,1,(MKL_LONG)nx);
210 status = DftiSetValue(fft->inplace[0],DFTI_PLACEMENT,DFTI_INPLACE);
213 status = DftiCommitDescriptor(fft->inplace[0]);
217 status = DftiCreateDescriptor(&fft->ooplace[0],GMX_DFTI_PREC,DFTI_REAL,1,(MKL_LONG)nx);
220 status = DftiSetValue(fft->ooplace[0],DFTI_PLACEMENT,DFTI_NOT_INPLACE);
223 status = DftiCommitDescriptor(fft->ooplace[0]);
226 if(status == DFTI_UNIMPLEMENTED)
229 "The linked Intel MKL version (<6.0?) cannot do real FFTs.");
230 gmx_fft_destroy(fft);
237 gmx_fatal(FARGS,"Error initializing Intel MKL FFT; status=%d",status);
238 gmx_fft_destroy(fft);
254 gmx_fft_init_2d(gmx_fft_t * pfft,
266 gmx_fatal(FARGS,"Invalid opaque FFT datatype pointer.");
271 if( (fft = (gmx_fft_t)malloc(sizeof(struct gmx_fft))) == NULL)
276 /* Mark all handles invalid */
279 fft->inplace[d] = fft->ooplace[d] = NULL;
281 fft->ooplace[3] = NULL;
286 status = DftiCreateDescriptor(&fft->inplace[0],GMX_DFTI_PREC,DFTI_COMPLEX,2,length);
289 status = DftiSetValue(fft->inplace[0],DFTI_PLACEMENT,DFTI_INPLACE);
292 status = DftiCommitDescriptor(fft->inplace[0]);
296 status = DftiCreateDescriptor(&fft->ooplace[0],GMX_DFTI_PREC,DFTI_COMPLEX,2,length);
299 status = DftiSetValue(fft->ooplace[0],DFTI_PLACEMENT,DFTI_NOT_INPLACE);
302 status = DftiCommitDescriptor(fft->ooplace[0]);
307 gmx_fatal(FARGS,"Error initializing Intel MKL FFT; status=%d",status);
308 gmx_fft_destroy(fft);
325 gmx_fft_init_2d_real(gmx_fft_t * pfft,
338 gmx_fatal(FARGS,"Invalid opaque FFT datatype pointer.");
343 if( (fft = (gmx_fft_t)malloc(sizeof(struct gmx_fft))) == NULL)
350 /* Mark all handles invalid */
353 fft->inplace[d] = fft->ooplace[d] = NULL;
355 fft->ooplace[3] = NULL;
357 /* Roll our own 2D real transform using multiple transforms in MKL,
358 * since the current MKL versions does not support our storage format,
359 * and all but the most recent don't even have 2D real FFTs.
363 status = DftiCreateDescriptor(&fft->inplace[0],GMX_DFTI_PREC,DFTI_COMPLEX,1,(MKL_LONG)nx);
371 (DftiSetValue(fft->inplace[0],DFTI_PLACEMENT,DFTI_INPLACE) ||
372 DftiSetValue(fft->inplace[0],DFTI_NUMBER_OF_TRANSFORMS,nyc) ||
373 DftiSetValue(fft->inplace[0],DFTI_INPUT_DISTANCE,1) ||
374 DftiSetValue(fft->inplace[0],DFTI_INPUT_STRIDES,stride) ||
375 DftiSetValue(fft->inplace[0],DFTI_OUTPUT_DISTANCE,1) ||
376 DftiSetValue(fft->inplace[0],DFTI_OUTPUT_STRIDES,stride));
380 status = DftiCommitDescriptor(fft->inplace[0]);
382 /* Out-of-place X FFT */
384 status = DftiCreateDescriptor(&(fft->ooplace[0]),GMX_DFTI_PREC,DFTI_COMPLEX,1,(MKL_LONG)nx);
392 (DftiSetValue(fft->ooplace[0],DFTI_PLACEMENT,DFTI_NOT_INPLACE) ||
393 DftiSetValue(fft->ooplace[0],DFTI_NUMBER_OF_TRANSFORMS,nyc) ||
394 DftiSetValue(fft->ooplace[0],DFTI_INPUT_DISTANCE,1) ||
395 DftiSetValue(fft->ooplace[0],DFTI_INPUT_STRIDES,stride) ||
396 DftiSetValue(fft->ooplace[0],DFTI_OUTPUT_DISTANCE,1) ||
397 DftiSetValue(fft->ooplace[0],DFTI_OUTPUT_STRIDES,stride));
401 status = DftiCommitDescriptor(fft->ooplace[0]);
406 status = DftiCreateDescriptor(&fft->inplace[1],GMX_DFTI_PREC,DFTI_REAL,1,(MKL_LONG)ny);
414 (DftiSetValue(fft->inplace[1],DFTI_PLACEMENT,DFTI_INPLACE) ||
415 DftiSetValue(fft->inplace[1],DFTI_NUMBER_OF_TRANSFORMS,(MKL_LONG)nx) ||
416 DftiSetValue(fft->inplace[1],DFTI_INPUT_DISTANCE,2*nyc) ||
417 DftiSetValue(fft->inplace[1],DFTI_INPUT_STRIDES,stride) ||
418 DftiSetValue(fft->inplace[1],DFTI_OUTPUT_DISTANCE,2*nyc) ||
419 DftiSetValue(fft->inplace[1],DFTI_OUTPUT_STRIDES,stride) ||
420 DftiCommitDescriptor(fft->inplace[1]));
424 /* Out-of-place real-to-complex (affects output distance) Y FFT */
426 status = DftiCreateDescriptor(&fft->ooplace[1],GMX_DFTI_PREC,DFTI_REAL,1,(MKL_LONG)ny);
434 (DftiSetValue(fft->ooplace[1],DFTI_PLACEMENT,DFTI_NOT_INPLACE) ||
435 DftiSetValue(fft->ooplace[1],DFTI_NUMBER_OF_TRANSFORMS,(MKL_LONG)nx) ||
436 DftiSetValue(fft->ooplace[1],DFTI_INPUT_DISTANCE,(MKL_LONG)ny) ||
437 DftiSetValue(fft->ooplace[1],DFTI_INPUT_STRIDES,stride) ||
438 DftiSetValue(fft->ooplace[1],DFTI_OUTPUT_DISTANCE,2*nyc) ||
439 DftiSetValue(fft->ooplace[1],DFTI_OUTPUT_STRIDES,stride) ||
440 DftiCommitDescriptor(fft->ooplace[1]));
444 /* Out-of-place complex-to-real (affects output distance) Y FFT */
446 status = DftiCreateDescriptor(&fft->ooplace[2],GMX_DFTI_PREC,DFTI_REAL,1,(MKL_LONG)ny);
454 (DftiSetValue(fft->ooplace[2],DFTI_PLACEMENT,DFTI_NOT_INPLACE) ||
455 DftiSetValue(fft->ooplace[2],DFTI_NUMBER_OF_TRANSFORMS,(MKL_LONG)nx) ||
456 DftiSetValue(fft->ooplace[2],DFTI_INPUT_DISTANCE,2*nyc) ||
457 DftiSetValue(fft->ooplace[2],DFTI_INPUT_STRIDES,stride) ||
458 DftiSetValue(fft->ooplace[2],DFTI_OUTPUT_DISTANCE,(MKL_LONG)ny) ||
459 DftiSetValue(fft->ooplace[2],DFTI_OUTPUT_STRIDES,stride) ||
460 DftiCommitDescriptor(fft->ooplace[2]));
466 if ((fft->work = (t_complex *)malloc(sizeof(t_complex)*(nx*(ny/2+1)))) == NULL)
474 gmx_fatal(FARGS,"Error initializing Intel MKL FFT; status=%d",status);
475 gmx_fft_destroy(fft);
491 gmx_fft_init_3d(gmx_fft_t * pfft,
504 gmx_fatal(FARGS,"Invalid opaque FFT datatype pointer.");
509 if( (fft = (gmx_fft_t)malloc(sizeof(struct gmx_fft))) == NULL)
514 /* Mark all handles invalid */
517 fft->inplace[d] = fft->ooplace[d] = NULL;
519 fft->ooplace[3] = NULL;
525 status = DftiCreateDescriptor(&fft->inplace[0],GMX_DFTI_PREC,DFTI_COMPLEX,(MKL_LONG)3,length);
528 status = DftiSetValue(fft->inplace[0],DFTI_PLACEMENT,DFTI_INPLACE);
531 status = DftiCommitDescriptor(fft->inplace[0]);
535 status = DftiCreateDescriptor(&fft->ooplace[0],GMX_DFTI_PREC,DFTI_COMPLEX,(MKL_LONG)3,length);
538 status = DftiSetValue(fft->ooplace[0],DFTI_PLACEMENT,DFTI_NOT_INPLACE);
541 status = DftiCommitDescriptor(fft->ooplace[0]);
546 gmx_fatal(FARGS,"Error initializing Intel MKL FFT; status=%d",status);
547 gmx_fft_destroy(fft);
567 gmx_fft_init_3d_real(gmx_fft_t * pfft,
581 gmx_fatal(FARGS,"Invalid opaque FFT datatype pointer.");
588 if( (fft = (gmx_fft_t)malloc(sizeof(struct gmx_fft))) == NULL)
593 /* Mark all handles invalid */
596 fft->inplace[d] = fft->ooplace[d] = NULL;
598 fft->ooplace[3] = NULL;
600 /* Roll our own 3D real transform using multiple transforms in MKL,
601 * since the current MKL versions does not support our storage format
602 * or 3D real transforms.
606 * ny*nzc complex-to-complex transforms, length nx
607 * transform distance: 1
608 * element strides: ny*nzc
610 status = DftiCreateDescriptor(&fft->inplace[0],GMX_DFTI_PREC,DFTI_COMPLEX,1,(MKL_LONG)nx);
618 (DftiSetValue(fft->inplace[0],DFTI_PLACEMENT,DFTI_INPLACE) ||
619 DftiSetValue(fft->inplace[0],DFTI_NUMBER_OF_TRANSFORMS,(MKL_LONG)ny*nzc) ||
620 DftiSetValue(fft->inplace[0],DFTI_INPUT_DISTANCE,1) ||
621 DftiSetValue(fft->inplace[0],DFTI_INPUT_STRIDES,stride) ||
622 DftiSetValue(fft->inplace[0],DFTI_OUTPUT_DISTANCE,1) ||
623 DftiSetValue(fft->inplace[0],DFTI_OUTPUT_STRIDES,stride) ||
624 DftiCommitDescriptor(fft->inplace[0]));
627 /* Out-of-place X FFT:
628 * ny*nzc complex-to-complex transforms, length nx
629 * transform distance: 1
630 * element strides: ny*nzc
633 status = DftiCreateDescriptor(&fft->ooplace[0],GMX_DFTI_PREC,DFTI_COMPLEX,1,(MKL_LONG)nx);
641 (DftiSetValue(fft->ooplace[0],DFTI_PLACEMENT,DFTI_NOT_INPLACE) ||
642 DftiSetValue(fft->ooplace[0],DFTI_NUMBER_OF_TRANSFORMS,(MKL_LONG)ny*nzc) ||
643 DftiSetValue(fft->ooplace[0],DFTI_INPUT_DISTANCE,1) ||
644 DftiSetValue(fft->ooplace[0],DFTI_INPUT_STRIDES,stride) ||
645 DftiSetValue(fft->ooplace[0],DFTI_OUTPUT_DISTANCE,1) ||
646 DftiSetValue(fft->ooplace[0],DFTI_OUTPUT_STRIDES,stride) ||
647 DftiCommitDescriptor(fft->ooplace[0]));
652 * We cannot do all NX*NZC transforms at once, so define a handle to do
653 * NZC transforms, and then execute it NX times.
654 * nzc complex-to-complex transforms, length ny
655 * transform distance: 1
656 * element strides: nzc
659 status = DftiCreateDescriptor(&fft->inplace[1],GMX_DFTI_PREC,DFTI_COMPLEX,1,(MKL_LONG)ny);
667 (DftiSetValue(fft->inplace[1],DFTI_PLACEMENT,DFTI_INPLACE) ||
668 DftiSetValue(fft->inplace[1],DFTI_NUMBER_OF_TRANSFORMS,(MKL_LONG)nzc) ||
669 DftiSetValue(fft->inplace[1],DFTI_INPUT_DISTANCE,1) ||
670 DftiSetValue(fft->inplace[1],DFTI_INPUT_STRIDES,stride) ||
671 DftiSetValue(fft->inplace[1],DFTI_OUTPUT_DISTANCE,1) ||
672 DftiSetValue(fft->inplace[1],DFTI_OUTPUT_STRIDES,stride) ||
673 DftiCommitDescriptor(fft->inplace[1]));
677 /* Out-of-place Y FFT:
678 * We cannot do all NX*NZC transforms at once, so define a handle to do
679 * NZC transforms, and then execute it NX times.
680 * nzc complex-to-complex transforms, length ny
681 * transform distance: 1
682 * element strides: nzc
685 status = DftiCreateDescriptor(&fft->ooplace[1],GMX_DFTI_PREC,DFTI_COMPLEX,1,(MKL_LONG)ny);
693 (DftiSetValue(fft->ooplace[1],DFTI_PLACEMENT,DFTI_NOT_INPLACE) ||
694 DftiSetValue(fft->ooplace[1],DFTI_NUMBER_OF_TRANSFORMS,(MKL_LONG)nzc) ||
695 DftiSetValue(fft->ooplace[1],DFTI_INPUT_DISTANCE,1) ||
696 DftiSetValue(fft->ooplace[1],DFTI_INPUT_STRIDES,stride) ||
697 DftiSetValue(fft->ooplace[1],DFTI_OUTPUT_DISTANCE,1) ||
698 DftiSetValue(fft->ooplace[1],DFTI_OUTPUT_STRIDES,stride) ||
699 DftiCommitDescriptor(fft->ooplace[1]));
703 * nx*ny real-to-complex transforms, length nz
704 * transform distance: nzc*2 -> nzc*2
708 status = DftiCreateDescriptor(&fft->inplace[2],GMX_DFTI_PREC,DFTI_REAL,1,(MKL_LONG)nz);
716 (DftiSetValue(fft->inplace[2],DFTI_PLACEMENT,DFTI_INPLACE) ||
717 DftiSetValue(fft->inplace[2],DFTI_NUMBER_OF_TRANSFORMS,(MKL_LONG)nx*ny) ||
718 DftiSetValue(fft->inplace[2],DFTI_INPUT_DISTANCE,(MKL_LONG)nzc*2) ||
719 DftiSetValue(fft->inplace[2],DFTI_INPUT_STRIDES,stride) ||
720 DftiSetValue(fft->inplace[2],DFTI_OUTPUT_DISTANCE,(MKL_LONG)nzc*2) ||
721 DftiSetValue(fft->inplace[2],DFTI_OUTPUT_STRIDES,stride) ||
722 DftiCommitDescriptor(fft->inplace[2]));
726 /* Out-of-place real-to-complex (affects distance) Z FFT:
727 * nx*ny real-to-complex transforms, length nz
728 * transform distance: nz -> nzc*2
732 status = DftiCreateDescriptor(&fft->ooplace[2],GMX_DFTI_PREC,DFTI_REAL,1,(MKL_LONG)nz);
740 (DftiSetValue(fft->ooplace[2],DFTI_PLACEMENT,DFTI_NOT_INPLACE) ||
741 DftiSetValue(fft->ooplace[2],DFTI_NUMBER_OF_TRANSFORMS,(MKL_LONG)nx*ny) ||
742 DftiSetValue(fft->ooplace[2],DFTI_INPUT_DISTANCE,(MKL_LONG)nz) ||
743 DftiSetValue(fft->ooplace[2],DFTI_INPUT_STRIDES,stride) ||
744 DftiSetValue(fft->ooplace[2],DFTI_OUTPUT_DISTANCE,(MKL_LONG)nzc*2) ||
745 DftiSetValue(fft->ooplace[2],DFTI_OUTPUT_STRIDES,stride) ||
746 DftiCommitDescriptor(fft->ooplace[2]));
750 /* Out-of-place complex-to-real (affects distance) Z FFT:
751 * nx*ny real-to-complex transforms, length nz
752 * transform distance: nzc*2 -> nz
756 status = DftiCreateDescriptor(&fft->ooplace[3],GMX_DFTI_PREC,DFTI_REAL,1,(MKL_LONG)nz);
764 (DftiSetValue(fft->ooplace[3],DFTI_PLACEMENT,DFTI_NOT_INPLACE) ||
765 DftiSetValue(fft->ooplace[3],DFTI_NUMBER_OF_TRANSFORMS,(MKL_LONG)nx*ny) ||
766 DftiSetValue(fft->ooplace[3],DFTI_INPUT_DISTANCE,(MKL_LONG)nzc*2) ||
767 DftiSetValue(fft->ooplace[3],DFTI_INPUT_STRIDES,stride) ||
768 DftiSetValue(fft->ooplace[3],DFTI_OUTPUT_DISTANCE,(MKL_LONG)nz) ||
769 DftiSetValue(fft->ooplace[3],DFTI_OUTPUT_STRIDES,stride) ||
770 DftiCommitDescriptor(fft->ooplace[3]));
776 if ((fft->work = (t_complex *)malloc(sizeof(t_complex)*(nx*ny*(nz/2+1)))) == NULL)
785 gmx_fatal(FARGS,"Error initializing Intel MKL FFT; status=%d",status);
786 gmx_fft_destroy(fft);
805 gmx_fft_1d(gmx_fft_t fft,
806 enum gmx_fft_direction dir,
810 int inplace = (in_data == out_data);
813 if( (fft->real_fft == 1) || (fft->ndim != 1) ||
814 ((dir != GMX_FFT_FORWARD) && (dir != GMX_FFT_BACKWARD)) )
816 gmx_fatal(FARGS,"FFT plan mismatch - bad plan or direction.");
820 if(dir==GMX_FFT_FORWARD)
824 status = DftiComputeForward(fft->inplace[0],in_data);
828 status = DftiComputeForward(fft->ooplace[0],in_data,out_data);
835 status = DftiComputeBackward(fft->inplace[0],in_data);
839 status = DftiComputeBackward(fft->ooplace[0],in_data,out_data);
845 gmx_fatal(FARGS,"Error executing Intel MKL FFT.");
855 gmx_fft_1d_real(gmx_fft_t fft,
856 enum gmx_fft_direction dir,
860 int inplace = (in_data == out_data);
863 if( (fft->real_fft != 1) || (fft->ndim != 1) ||
864 ((dir != GMX_FFT_REAL_TO_COMPLEX) && (dir != GMX_FFT_COMPLEX_TO_REAL)) )
866 gmx_fatal(FARGS,"FFT plan mismatch - bad plan or direction.");
870 if(dir==GMX_FFT_REAL_TO_COMPLEX)
874 status = DftiComputeForward(fft->inplace[0],in_data);
878 status = DftiComputeForward(fft->ooplace[0],in_data,out_data);
885 status = DftiComputeBackward(fft->inplace[0],in_data);
889 status = DftiComputeBackward(fft->ooplace[0],in_data,out_data);
895 gmx_fatal(FARGS,"Error executing Intel MKL FFT.");
904 gmx_fft_2d(gmx_fft_t fft,
905 enum gmx_fft_direction dir,
909 int inplace = (in_data == out_data);
912 if( (fft->real_fft == 1) || (fft->ndim != 2) ||
913 ((dir != GMX_FFT_FORWARD) && (dir != GMX_FFT_BACKWARD)) )
915 gmx_fatal(FARGS,"FFT plan mismatch - bad plan or direction.");
919 if(dir==GMX_FFT_FORWARD)
923 status = DftiComputeForward(fft->inplace[0],in_data);
927 status = DftiComputeForward(fft->ooplace[0],in_data,out_data);
934 status = DftiComputeBackward(fft->inplace[0],in_data);
938 status = DftiComputeBackward(fft->ooplace[0],in_data,out_data);
944 gmx_fatal(FARGS,"Error executing Intel MKL FFT.");
953 gmx_fft_2d_real(gmx_fft_t fft,
954 enum gmx_fft_direction dir,
958 int inplace = (in_data == out_data);
961 if( (fft->real_fft != 1) || (fft->ndim != 2) ||
962 ((dir != GMX_FFT_REAL_TO_COMPLEX) && (dir != GMX_FFT_COMPLEX_TO_REAL)) )
964 gmx_fatal(FARGS,"FFT plan mismatch - bad plan or direction.");
968 if(dir==GMX_FFT_REAL_TO_COMPLEX)
972 /* real-to-complex in Y dimension, in-place */
973 status = DftiComputeForward(fft->inplace[1],in_data);
975 /* complex-to-complex in X dimension, in-place */
977 status = DftiComputeForward(fft->inplace[0],in_data);
981 /* real-to-complex in Y dimension, in_data to out_data */
982 status = DftiComputeForward(fft->ooplace[1],in_data,out_data);
984 /* complex-to-complex in X dimension, in-place to out_data */
986 status = DftiComputeForward(fft->inplace[0],out_data);
993 /* complex-to-complex in X dimension, in-place */
994 status = DftiComputeBackward(fft->inplace[0],in_data);
996 /* complex-to-real in Y dimension, in-place */
998 status = DftiComputeBackward(fft->inplace[1],in_data);
1003 /* complex-to-complex in X dimension, from in_data to work */
1004 status = DftiComputeBackward(fft->ooplace[0],in_data,fft->work);
1006 /* complex-to-real in Y dimension, from work to out_data */
1008 status = DftiComputeBackward(fft->ooplace[1],fft->work,out_data);
1015 gmx_fatal(FARGS,"Error executing Intel MKL FFT.");
1024 gmx_fft_3d(gmx_fft_t fft,
1025 enum gmx_fft_direction dir,
1029 int inplace = (in_data == out_data);
1032 if( (fft->real_fft == 1) || (fft->ndim != 3) ||
1033 ((dir != GMX_FFT_FORWARD) && (dir != GMX_FFT_BACKWARD)) )
1035 gmx_fatal(FARGS,"FFT plan mismatch - bad plan or direction.");
1039 if(dir==GMX_FFT_FORWARD)
1043 status = DftiComputeForward(fft->inplace[0],in_data);
1047 status = DftiComputeForward(fft->ooplace[0],in_data,out_data);
1054 status = DftiComputeBackward(fft->inplace[0],in_data);
1058 status = DftiComputeBackward(fft->ooplace[0],in_data,out_data);
1064 gmx_fatal(FARGS,"Error executing Intel MKL FFT.");
1073 gmx_fft_3d_real(gmx_fft_t fft,
1074 enum gmx_fft_direction dir,
1078 int inplace = (in_data == out_data);
1085 nzc = fft->nz/2 + 1;
1087 if( (fft->real_fft != 1) || (fft->ndim != 3) ||
1088 ((dir != GMX_FFT_REAL_TO_COMPLEX) && (dir != GMX_FFT_COMPLEX_TO_REAL)) )
1090 gmx_fatal(FARGS,"FFT plan mismatch - bad plan or direction.");
1094 if(dir==GMX_FFT_REAL_TO_COMPLEX)
1098 /* real-to-complex in Z dimension, in-place */
1099 status = DftiComputeForward(fft->inplace[2],in_data);
1101 /* complex-to-complex in Y dimension, in-place */
1105 status = DftiComputeForward(fft->inplace[1],(t_complex *)in_data+i*ny*nzc);
1108 /* complex-to-complex in X dimension, in-place */
1110 status = DftiComputeForward(fft->inplace[0],in_data);
1114 /* real-to-complex in Z dimension, from in_data to out_data */
1115 status = DftiComputeForward(fft->ooplace[2],in_data,out_data);
1117 /* complex-to-complex in Y dimension, in-place */
1121 status = DftiComputeForward(fft->inplace[1],(t_complex *)out_data+i*ny*nzc);
1124 /* complex-to-complex in X dimension, in-place */
1126 status = DftiComputeForward(fft->inplace[0],out_data);
1133 /* complex-to-complex in X dimension, in-place */
1134 status = DftiComputeBackward(fft->inplace[0],in_data);
1136 /* complex-to-complex in Y dimension, in-place */
1140 status = DftiComputeBackward(fft->inplace[1],(t_complex *)in_data+i*ny*nzc);
1143 /* complex-to-real in Z dimension, in-place */
1145 status = DftiComputeBackward(fft->inplace[2],in_data);
1149 /* complex-to-complex in X dimension, from in_data to work */
1150 status = DftiComputeBackward(fft->ooplace[0],in_data,fft->work);
1152 /* complex-to-complex in Y dimension, in-place */
1156 status = DftiComputeBackward(fft->inplace[1],fft->work+i*ny*nzc);
1159 /* complex-to-real in Z dimension, work to out_data */
1161 status = DftiComputeBackward(fft->ooplace[2],fft->work,out_data);
1167 gmx_fatal(FARGS,"Error executing Intel MKL FFT.");
1177 gmx_fft_destroy(gmx_fft_t fft)
1185 if(fft->inplace[d] != NULL)
1187 DftiFreeDescriptor(&fft->inplace[d]);
1189 if(fft->ooplace[d] != NULL)
1191 DftiFreeDescriptor(&fft->ooplace[d]);
1194 if(fft->ooplace[3] != NULL)
1196 DftiFreeDescriptor(&fft->ooplace[3]);
1205 #endif /* GMX_FFT_MKL */