2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2012, The GROMACS development team,
6 * check out http://www.gromacs.org for more information.
7 * Copyright (c) 2012, by the GROMACS development team, led by
8 * David van der Spoel, Berk Hess, Erik Lindahl, and including many
9 * others, as listed in the AUTHORS file in the top-level source
10 * directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
40 ************************************************************
42 Copy of fftpack from Numpy with very minor modifications:
43 - usage of fftpack.h (replacement for Treal define)
44 - [cr]fft[ifb]1 non-static
45 - Added Copyright headers
46 - Added fftpack_ prefix
48 Original version is from Numpy 1.6
50 ************************************************************
52 Copyright (c) 2005-2011, NumPy Developers.
55 Redistribution and use in source and binary forms, with or without
56 modification, are permitted provided that the following conditions are
59 * Redistributions of source code must retain the above copyright
60 notice, this list of conditions and the following disclaimer.
62 * Redistributions in binary form must reproduce the above
63 copyright notice, this list of conditions and the following
64 disclaimer in the documentation and/or other materials provided
65 with the distribution.
67 * Neither the name of the NumPy Developers nor the names of any
68 contributors may be used to endorse or promote products derived
69 from this software without specific prior written permission.
71 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
72 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
73 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
74 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
75 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
76 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
77 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
78 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
79 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
80 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
81 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
83 fftpack.c : A set of FFT routines in C.
84 Algorithmically based on Fortran-77 FFTPACK by Paul N. Swarztrauber (Version 4, 1985).
88 /* isign is +1 for backward and -1 for forward transforms */
97 #define MAXFAC 13 /* maximum number of factors in factorization of n */
98 #define NSPECIAL 4 /* number of factors for which we have special-case routines */
105 /* ----------------------------------------------------------------------
106 passf2, passf3, passf4, passf5, passf. Complex FFT passes fwd and bwd.
107 ---------------------------------------------------------------------- */
109 static void passf2(int ido, int l1, const Treal cc[], Treal ch[], const Treal wa1[], int isign)
110 /* isign==+1 for backward transform */
115 for (k=0; k<l1; k++) {
118 ch[ah] = ref(cc,ac) + ref(cc,ac + ido);
119 ch[ah + ido*l1] = ref(cc,ac) - ref(cc,ac + ido);
120 ch[ah+1] = ref(cc,ac+1) + ref(cc,ac + ido + 1);
121 ch[ah + ido*l1 + 1] = ref(cc,ac+1) - ref(cc,ac + ido + 1);
124 for (k=0; k<l1; k++) {
125 for (i=0; i<ido-1; i+=2) {
128 ch[ah] = ref(cc,ac) + ref(cc,ac + ido);
129 tr2 = ref(cc,ac) - ref(cc,ac + ido);
130 ch[ah+1] = ref(cc,ac+1) + ref(cc,ac + 1 + ido);
131 ti2 = ref(cc,ac+1) - ref(cc,ac + 1 + ido);
132 ch[ah+l1*ido+1] = wa1[i]*ti2 + isign*wa1[i+1]*tr2;
133 ch[ah+l1*ido] = wa1[i]*tr2 - isign*wa1[i+1]*ti2;
140 static void passf3(int ido, int l1, const Treal cc[], Treal ch[],
141 const Treal wa1[], const Treal wa2[], int isign)
142 /* isign==+1 for backward transform */
144 static const Treal taur = -0.5;
145 static const Treal taui = 0.866025403784439;
147 Treal ci2, ci3, di2, di3, cr2, cr3, dr2, dr3, ti2, tr2;
149 for (k=1; k<=l1; k++) {
151 tr2 = ref(cc,ac) + ref(cc,ac + ido);
152 cr2 = ref(cc,ac - ido) + taur*tr2;
154 ch[ah] = ref(cc,ac - ido) + tr2;
156 ti2 = ref(cc,ac + 1) + ref(cc,ac + ido + 1);
157 ci2 = ref(cc,ac - ido + 1) + taur*ti2;
158 ch[ah + 1] = ref(cc,ac - ido + 1) + ti2;
160 cr3 = isign*taui*(ref(cc,ac) - ref(cc,ac + ido));
161 ci3 = isign*taui*(ref(cc,ac + 1) - ref(cc,ac + ido + 1));
162 ch[ah + l1*ido] = cr2 - ci3;
163 ch[ah + 2*l1*ido] = cr2 + ci3;
164 ch[ah + l1*ido + 1] = ci2 + cr3;
165 ch[ah + 2*l1*ido + 1] = ci2 - cr3;
168 for (k=1; k<=l1; k++) {
169 for (i=0; i<ido-1; i+=2) {
170 ac = i + (3*k - 2)*ido;
171 tr2 = ref(cc,ac) + ref(cc,ac + ido);
172 cr2 = ref(cc,ac - ido) + taur*tr2;
174 ch[ah] = ref(cc,ac - ido) + tr2;
175 ti2 = ref(cc,ac + 1) + ref(cc,ac + ido + 1);
176 ci2 = ref(cc,ac - ido + 1) + taur*ti2;
177 ch[ah + 1] = ref(cc,ac - ido + 1) + ti2;
178 cr3 = isign*taui*(ref(cc,ac) - ref(cc,ac + ido));
179 ci3 = isign*taui*(ref(cc,ac + 1) - ref(cc,ac + ido + 1));
184 ch[ah + l1*ido + 1] = wa1[i]*di2 + isign*wa1[i+1]*dr2;
185 ch[ah + l1*ido] = wa1[i]*dr2 - isign*wa1[i+1]*di2;
186 ch[ah + 2*l1*ido + 1] = wa2[i]*di3 + isign*wa2[i+1]*dr3;
187 ch[ah + 2*l1*ido] = wa2[i]*dr3 - isign*wa2[i+1]*di3;
194 static void passf4(int ido, int l1, const Treal cc[], Treal ch[],
195 const Treal wa1[], const Treal wa2[], const Treal wa3[], int isign)
196 /* isign == -1 for forward transform and +1 for backward transform */
199 Treal ci2, ci3, ci4, cr2, cr3, cr4, ti1, ti2, ti3, ti4, tr1, tr2, tr3, tr4;
201 for (k=0; k<l1; k++) {
203 ti1 = ref(cc,ac) - ref(cc,ac + 2*ido);
204 ti2 = ref(cc,ac) + ref(cc,ac + 2*ido);
205 tr4 = ref(cc,ac + 3*ido) - ref(cc,ac + ido);
206 ti3 = ref(cc,ac + ido) + ref(cc,ac + 3*ido);
207 tr1 = ref(cc,ac - 1) - ref(cc,ac + 2*ido - 1);
208 tr2 = ref(cc,ac - 1) + ref(cc,ac + 2*ido - 1);
209 ti4 = ref(cc,ac + ido - 1) - ref(cc,ac + 3*ido - 1);
210 tr3 = ref(cc,ac + ido - 1) + ref(cc,ac + 3*ido - 1);
213 ch[ah + 2*l1*ido] = tr2 - tr3;
214 ch[ah + 1] = ti2 + ti3;
215 ch[ah + 2*l1*ido + 1] = ti2 - ti3;
216 ch[ah + l1*ido] = tr1 + isign*tr4;
217 ch[ah + 3*l1*ido] = tr1 - isign*tr4;
218 ch[ah + l1*ido + 1] = ti1 + isign*ti4;
219 ch[ah + 3*l1*ido + 1] = ti1 - isign*ti4;
222 for (k=0; k<l1; k++) {
223 for (i=0; i<ido-1; i+=2) {
224 ac = i + 1 + 4*k*ido;
225 ti1 = ref(cc,ac) - ref(cc,ac + 2*ido);
226 ti2 = ref(cc,ac) + ref(cc,ac + 2*ido);
227 ti3 = ref(cc,ac + ido) + ref(cc,ac + 3*ido);
228 tr4 = ref(cc,ac + 3*ido) - ref(cc,ac + ido);
229 tr1 = ref(cc,ac - 1) - ref(cc,ac + 2*ido - 1);
230 tr2 = ref(cc,ac - 1) + ref(cc,ac + 2*ido - 1);
231 ti4 = ref(cc,ac + ido - 1) - ref(cc,ac + 3*ido - 1);
232 tr3 = ref(cc,ac + ido - 1) + ref(cc,ac + 3*ido - 1);
236 ch[ah + 1] = ti2 + ti3;
238 cr2 = tr1 + isign*tr4;
239 cr4 = tr1 - isign*tr4;
240 ci2 = ti1 + isign*ti4;
241 ci4 = ti1 - isign*ti4;
242 ch[ah + l1*ido] = wa1[i]*cr2 - isign*wa1[i + 1]*ci2;
243 ch[ah + l1*ido + 1] = wa1[i]*ci2 + isign*wa1[i + 1]*cr2;
244 ch[ah + 2*l1*ido] = wa2[i]*cr3 - isign*wa2[i + 1]*ci3;
245 ch[ah + 2*l1*ido + 1] = wa2[i]*ci3 + isign*wa2[i + 1]*cr3;
246 ch[ah + 3*l1*ido] = wa3[i]*cr4 -isign*wa3[i + 1]*ci4;
247 ch[ah + 3*l1*ido + 1] = wa3[i]*ci4 + isign*wa3[i + 1]*cr4;
254 static void passf5(int ido, int l1, const Treal cc[], Treal ch[],
255 const Treal wa1[], const Treal wa2[], const Treal wa3[], const Treal wa4[], int isign)
256 /* isign == -1 for forward transform and +1 for backward transform */
258 static const Treal tr11 = 0.309016994374947;
259 static const Treal ti11 = 0.951056516295154;
260 static const Treal tr12 = -0.809016994374947;
261 static const Treal ti12 = 0.587785252292473;
263 Treal ci2, ci3, ci4, ci5, di3, di4, di5, di2, cr2, cr3, cr5, cr4, ti2, ti3,
264 ti4, ti5, dr3, dr4, dr5, dr2, tr2, tr3, tr4, tr5;
266 for (k = 1; k <= l1; ++k) {
267 ac = (5*k - 4)*ido + 1;
268 ti5 = ref(cc,ac) - ref(cc,ac + 3*ido);
269 ti2 = ref(cc,ac) + ref(cc,ac + 3*ido);
270 ti4 = ref(cc,ac + ido) - ref(cc,ac + 2*ido);
271 ti3 = ref(cc,ac + ido) + ref(cc,ac + 2*ido);
272 tr5 = ref(cc,ac - 1) - ref(cc,ac + 3*ido - 1);
273 tr2 = ref(cc,ac - 1) + ref(cc,ac + 3*ido - 1);
274 tr4 = ref(cc,ac + ido - 1) - ref(cc,ac + 2*ido - 1);
275 tr3 = ref(cc,ac + ido - 1) + ref(cc,ac + 2*ido - 1);
277 ch[ah] = ref(cc,ac - ido - 1) + tr2 + tr3;
278 ch[ah + 1] = ref(cc,ac - ido) + ti2 + ti3;
279 cr2 = ref(cc,ac - ido - 1) + tr11*tr2 + tr12*tr3;
280 ci2 = ref(cc,ac - ido) + tr11*ti2 + tr12*ti3;
281 cr3 = ref(cc,ac - ido - 1) + tr12*tr2 + tr11*tr3;
282 ci3 = ref(cc,ac - ido) + tr12*ti2 + tr11*ti3;
283 cr5 = isign*(ti11*tr5 + ti12*tr4);
284 ci5 = isign*(ti11*ti5 + ti12*ti4);
285 cr4 = isign*(ti12*tr5 - ti11*tr4);
286 ci4 = isign*(ti12*ti5 - ti11*ti4);
287 ch[ah + l1*ido] = cr2 - ci5;
288 ch[ah + 4*l1*ido] = cr2 + ci5;
289 ch[ah + l1*ido + 1] = ci2 + cr5;
290 ch[ah + 2*l1*ido + 1] = ci3 + cr4;
291 ch[ah + 2*l1*ido] = cr3 - ci4;
292 ch[ah + 3*l1*ido] = cr3 + ci4;
293 ch[ah + 3*l1*ido + 1] = ci3 - cr4;
294 ch[ah + 4*l1*ido + 1] = ci2 - cr5;
297 for (k=1; k<=l1; k++) {
298 for (i=0; i<ido-1; i+=2) {
299 ac = i + 1 + (k*5 - 4)*ido;
300 ti5 = ref(cc,ac) - ref(cc,ac + 3*ido);
301 ti2 = ref(cc,ac) + ref(cc,ac + 3*ido);
302 ti4 = ref(cc,ac + ido) - ref(cc,ac + 2*ido);
303 ti3 = ref(cc,ac + ido) + ref(cc,ac + 2*ido);
304 tr5 = ref(cc,ac - 1) - ref(cc,ac + 3*ido - 1);
305 tr2 = ref(cc,ac - 1) + ref(cc,ac + 3*ido - 1);
306 tr4 = ref(cc,ac + ido - 1) - ref(cc,ac + 2*ido - 1);
307 tr3 = ref(cc,ac + ido - 1) + ref(cc,ac + 2*ido - 1);
308 ah = i + (k - 1)*ido;
309 ch[ah] = ref(cc,ac - ido - 1) + tr2 + tr3;
310 ch[ah + 1] = ref(cc,ac - ido) + ti2 + ti3;
311 cr2 = ref(cc,ac - ido - 1) + tr11*tr2 + tr12*tr3;
313 ci2 = ref(cc,ac - ido) + tr11*ti2 + tr12*ti3;
314 cr3 = ref(cc,ac - ido - 1) + tr12*tr2 + tr11*tr3;
316 ci3 = ref(cc,ac - ido) + tr12*ti2 + tr11*ti3;
317 cr5 = isign*(ti11*tr5 + ti12*tr4);
318 ci5 = isign*(ti11*ti5 + ti12*ti4);
319 cr4 = isign*(ti12*tr5 - ti11*tr4);
320 ci4 = isign*(ti12*ti5 - ti11*ti4);
329 ch[ah + l1*ido] = wa1[i]*dr2 - isign*wa1[i+1]*di2;
330 ch[ah + l1*ido + 1] = wa1[i]*di2 + isign*wa1[i+1]*dr2;
331 ch[ah + 2*l1*ido] = wa2[i]*dr3 - isign*wa2[i+1]*di3;
332 ch[ah + 2*l1*ido + 1] = wa2[i]*di3 + isign*wa2[i+1]*dr3;
333 ch[ah + 3*l1*ido] = wa3[i]*dr4 - isign*wa3[i+1]*di4;
334 ch[ah + 3*l1*ido + 1] = wa3[i]*di4 + isign*wa3[i+1]*dr4;
335 ch[ah + 4*l1*ido] = wa4[i]*dr5 - isign*wa4[i+1]*di5;
336 ch[ah + 4*l1*ido + 1] = wa4[i]*di5 + isign*wa4[i+1]*dr5;
343 static void passf(int *nac, int ido, int ip, int l1, int idl1,
344 Treal cc[], Treal ch[],
345 const Treal wa[], int isign)
346 /* isign is -1 for forward transform and +1 for backward transform */
348 int idij, idlj, idot, ipph, i, j, k, l, jc, lc, ik, idj, idl, inc,idp;
356 for (j=1; j<ipph; j++) {
358 for (k=0; k<l1; k++) {
359 for (i=0; i<ido; i++) {
360 ch[i + (k + j*l1)*ido] =
361 ref(cc,i + (j + k*ip)*ido) + ref(cc,i + (jc + k*ip)*ido);
362 ch[i + (k + jc*l1)*ido] =
363 ref(cc,i + (j + k*ip)*ido) - ref(cc,i + (jc + k*ip)*ido);
368 for (i=0; i<ido; i++)
369 ch[i + k*ido] = ref(cc,i + k*ip*ido);
371 for (j=1; j<ipph; j++) {
373 for (i=0; i<ido; i++) {
374 for (k=0; k<l1; k++) {
375 ch[i + (k + j*l1)*ido] = ref(cc,i + (j + k*ip)*ido) + ref(cc,i + (jc + k*
377 ch[i + (k + jc*l1)*ido] = ref(cc,i + (j + k*ip)*ido) - ref(cc,i + (jc + k*
382 for (i=0; i<ido; i++)
384 ch[i + k*ido] = ref(cc,i + k*ip*ido);
389 for (l=1; l<ipph; l++) {
392 for (ik=0; ik<idl1; ik++) {
393 cc[ik + l*idl1] = ch[ik] + wa[idl - 2]*ch[ik + idl1];
394 cc[ik + lc*idl1] = isign*wa[idl-1]*ch[ik + (ip-1)*idl1];
398 for (j=2; j<ipph; j++) {
401 if (idlj > idp) idlj -= idp;
404 for (ik=0; ik<idl1; ik++) {
405 cc[ik + l*idl1] += war*ch[ik + j*idl1];
406 cc[ik + lc*idl1] += isign*wai*ch[ik + jc*idl1];
410 for (j=1; j<ipph; j++)
411 for (ik=0; ik<idl1; ik++)
412 ch[ik] += ch[ik + j*idl1];
413 for (j=1; j<ipph; j++) {
415 for (ik=1; ik<idl1; ik+=2) {
416 ch[ik - 1 + j*idl1] = cc[ik - 1 + j*idl1] - cc[ik + jc*idl1];
417 ch[ik - 1 + jc*idl1] = cc[ik - 1 + j*idl1] + cc[ik + jc*idl1];
418 ch[ik + j*idl1] = cc[ik + j*idl1] + cc[ik - 1 + jc*idl1];
419 ch[ik + jc*idl1] = cc[ik + j*idl1] - cc[ik - 1 + jc*idl1];
423 if (ido == 2) return;
425 for (ik=0; ik<idl1; ik++)
427 for (j=1; j<ip; j++) {
428 for (k=0; k<l1; k++) {
429 cc[(k + j*l1)*ido + 0] = ch[(k + j*l1)*ido + 0];
430 cc[(k + j*l1)*ido + 1] = ch[(k + j*l1)*ido + 1];
435 for (j=1; j<ip; j++) {
437 for (i=3; i<ido; i+=2) {
439 for (k=0; k<l1; k++) {
440 cc[i - 1 + (k + j*l1)*ido] =
441 wa[idij - 2]*ch[i - 1 + (k + j*l1)*ido] -
442 isign*wa[idij-1]*ch[i + (k + j*l1)*ido];
443 cc[i + (k + j*l1)*ido] =
444 wa[idij - 2]*ch[i + (k + j*l1)*ido] +
445 isign*wa[idij-1]*ch[i - 1 + (k + j*l1)*ido];
451 for (j=1; j<ip; j++) {
453 for (k = 0; k < l1; k++) {
455 for (i=3; i<ido; i+=2) {
457 cc[i - 1 + (k + j*l1)*ido] =
458 wa[idij - 2]*ch[i - 1 + (k + j*l1)*ido] -
459 isign*wa[idij-1]*ch[i + (k + j*l1)*ido];
460 cc[i + (k + j*l1)*ido] =
461 wa[idij - 2]*ch[i + (k + j*l1)*ido] +
462 isign*wa[idij-1]*ch[i - 1 + (k + j*l1)*ido];
470 /* ----------------------------------------------------------------------
471 radf2,radb2, radf3,radb3, radf4,radb4, radf5,radb5, radfg,radbg.
472 Treal FFT passes fwd and bwd.
473 ---------------------------------------------------------------------- */
475 static void radf2(int ido, int l1, const Treal cc[], Treal ch[], const Treal wa1[])
479 for (k=0; k<l1; k++) {
481 ref(cc,k*ido) + ref(cc,(k + l1)*ido);
482 ch[(2*k+1)*ido + ido-1] =
483 ref(cc,k*ido) - ref(cc,(k + l1)*ido);
487 for (k=0; k<l1; k++) {
488 for (i=2; i<ido; i+=2) {
490 tr2 = wa1[i - 2]*ref(cc, i-1 + (k + l1)*ido) + wa1[i - 1]*ref(cc, i + (k + l1)*ido);
491 ti2 = wa1[i - 2]*ref(cc, i + (k + l1)*ido) - wa1[i - 1]*ref(cc, i-1 + (k + l1)*ido);
492 ch[i + 2*k*ido] = ref(cc,i + k*ido) + ti2;
493 ch[ic + (2*k+1)*ido] = ti2 - ref(cc,i + k*ido);
494 ch[i - 1 + 2*k*ido] = ref(cc,i - 1 + k*ido) + tr2;
495 ch[ic - 1 + (2*k+1)*ido] = ref(cc,i - 1 + k*ido) - tr2;
498 if (ido % 2 == 1) return;
500 for (k=0; k<l1; k++) {
501 ch[(2*k+1)*ido] = -ref(cc,ido-1 + (k + l1)*ido);
502 ch[ido-1 + 2*k*ido] = ref(cc,ido-1 + k*ido);
507 static void radb2(int ido, int l1, const Treal cc[], Treal ch[], const Treal wa1[])
511 for (k=0; k<l1; k++) {
513 ref(cc,2*k*ido) + ref(cc,ido-1 + (2*k+1)*ido);
515 ref(cc,2*k*ido) - ref(cc,ido-1 + (2*k+1)*ido);
519 for (k = 0; k < l1; ++k) {
520 for (i = 2; i < ido; i += 2) {
523 ref(cc,i-1 + 2*k*ido) + ref(cc,ic-1 + (2*k+1)*ido);
524 tr2 = ref(cc,i-1 + 2*k*ido) - ref(cc,ic-1 + (2*k+1)*ido);
526 ref(cc,i + 2*k*ido) - ref(cc,ic + (2*k+1)*ido);
527 ti2 = ref(cc,i + (2*k)*ido) + ref(cc,ic + (2*k+1)*ido);
528 ch[i-1 + (k + l1)*ido] =
529 wa1[i - 2]*tr2 - wa1[i - 1]*ti2;
530 ch[i + (k + l1)*ido] =
531 wa1[i - 2]*ti2 + wa1[i - 1]*tr2;
534 if (ido % 2 == 1) return;
536 for (k = 0; k < l1; k++) {
537 ch[ido-1 + k*ido] = 2*ref(cc,ido-1 + 2*k*ido);
538 ch[ido-1 + (k + l1)*ido] = -2*ref(cc,(2*k+1)*ido);
543 static void radf3(int ido, int l1, const Treal cc[], Treal ch[],
544 const Treal wa1[], const Treal wa2[])
546 static const Treal taur = -0.5;
547 static const Treal taui = 0.866025403784439;
549 Treal ci2, di2, di3, cr2, dr2, dr3, ti2, ti3, tr2, tr3;
550 for (k=0; k<l1; k++) {
551 cr2 = ref(cc,(k + l1)*ido) + ref(cc,(k + 2*l1)*ido);
552 ch[3*k*ido] = ref(cc,k*ido) + cr2;
553 ch[(3*k+2)*ido] = taui*(ref(cc,(k + l1*2)*ido) - ref(cc,(k + l1)*ido));
554 ch[ido-1 + (3*k + 1)*ido] = ref(cc,k*ido) + taur*cr2;
556 if (ido == 1) return;
557 for (k=0; k<l1; k++) {
558 for (i=2; i<ido; i+=2) {
560 dr2 = wa1[i - 2]*ref(cc,i - 1 + (k + l1)*ido) +
561 wa1[i - 1]*ref(cc,i + (k + l1)*ido);
562 di2 = wa1[i - 2]*ref(cc,i + (k + l1)*ido) - wa1[i - 1]*ref(cc,i - 1 + (k + l1)*ido);
563 dr3 = wa2[i - 2]*ref(cc,i - 1 + (k + l1*2)*ido) + wa2[i - 1]*ref(cc,i + (k + l1*2)*ido);
564 di3 = wa2[i - 2]*ref(cc,i + (k + l1*2)*ido) - wa2[i - 1]*ref(cc,i - 1 + (k + l1*2)*ido);
567 ch[i - 1 + 3*k*ido] = ref(cc,i - 1 + k*ido) + cr2;
568 ch[i + 3*k*ido] = ref(cc,i + k*ido) + ci2;
569 tr2 = ref(cc,i - 1 + k*ido) + taur*cr2;
570 ti2 = ref(cc,i + k*ido) + taur*ci2;
571 tr3 = taui*(di2 - di3);
572 ti3 = taui*(dr3 - dr2);
573 ch[i - 1 + (3*k + 2)*ido] = tr2 + tr3;
574 ch[ic - 1 + (3*k + 1)*ido] = tr2 - tr3;
575 ch[i + (3*k + 2)*ido] = ti2 + ti3;
576 ch[ic + (3*k + 1)*ido] = ti3 - ti2;
582 static void radb3(int ido, int l1, const Treal cc[], Treal ch[],
583 const Treal wa1[], const Treal wa2[])
585 static const Treal taur = -0.5;
586 static const Treal taui = 0.866025403784439;
588 Treal ci2, ci3, di2, di3, cr2, cr3, dr2, dr3, ti2, tr2;
589 for (k=0; k<l1; k++) {
590 tr2 = 2*ref(cc,ido-1 + (3*k + 1)*ido);
591 cr2 = ref(cc,3*k*ido) + taur*tr2;
592 ch[k*ido] = ref(cc,3*k*ido) + tr2;
593 ci3 = 2*taui*ref(cc,(3*k + 2)*ido);
594 ch[(k + l1)*ido] = cr2 - ci3;
595 ch[(k + 2*l1)*ido] = cr2 + ci3;
597 if (ido == 1) return;
598 for (k=0; k<l1; k++) {
599 for (i=2; i<ido; i+=2) {
601 tr2 = ref(cc,i - 1 + (3*k + 2)*ido) + ref(cc,ic - 1 + (3*k + 1)*ido);
602 cr2 = ref(cc,i - 1 + 3*k*ido) + taur*tr2;
603 ch[i - 1 + k*ido] = ref(cc,i - 1 + 3*k*ido) + tr2;
604 ti2 = ref(cc,i + (3*k + 2)*ido) - ref(cc,ic + (3*k + 1)*ido);
605 ci2 = ref(cc,i + 3*k*ido) + taur*ti2;
606 ch[i + k*ido] = ref(cc,i + 3*k*ido) + ti2;
607 cr3 = taui*(ref(cc,i - 1 + (3*k + 2)*ido) - ref(cc,ic - 1 + (3*k + 1)*ido));
608 ci3 = taui*(ref(cc,i + (3*k + 2)*ido) + ref(cc,ic + (3*k + 1)*ido));
613 ch[i - 1 + (k + l1)*ido] = wa1[i - 2]*dr2 - wa1[i - 1]*di2;
614 ch[i + (k + l1)*ido] = wa1[i - 2]*di2 + wa1[i - 1]*dr2;
615 ch[i - 1 + (k + 2*l1)*ido] = wa2[i - 2]*dr3 - wa2[i - 1]*di3;
616 ch[i + (k + 2*l1)*ido] = wa2[i - 2]*di3 + wa2[i - 1]*dr3;
622 static void radf4(int ido, int l1, const Treal cc[], Treal ch[],
623 const Treal wa1[], const Treal wa2[], const Treal wa3[])
625 static const Treal hsqt2 = 0.7071067811865475;
627 Treal ci2, ci3, ci4, cr2, cr3, cr4, ti1, ti2, ti3, ti4, tr1, tr2, tr3, tr4;
628 for (k=0; k<l1; k++) {
629 tr1 = ref(cc,(k + l1)*ido) + ref(cc,(k + 3*l1)*ido);
630 tr2 = ref(cc,k*ido) + ref(cc,(k + 2*l1)*ido);
631 ch[4*k*ido] = tr1 + tr2;
632 ch[ido-1 + (4*k + 3)*ido] = tr2 - tr1;
633 ch[ido-1 + (4*k + 1)*ido] = ref(cc,k*ido) - ref(cc,(k + 2*l1)*ido);
634 ch[(4*k + 2)*ido] = ref(cc,(k + 3*l1)*ido) - ref(cc,(k + l1)*ido);
638 for (k=0; k<l1; k++) {
639 for (i=2; i<ido; i += 2) {
641 cr2 = wa1[i - 2]*ref(cc,i - 1 + (k + l1)*ido) + wa1[i - 1]*ref(cc,i + (k + l1)*ido);
642 ci2 = wa1[i - 2]*ref(cc,i + (k + l1)*ido) - wa1[i - 1]*ref(cc,i - 1 + (k + l1)*ido);
643 cr3 = wa2[i - 2]*ref(cc,i - 1 + (k + 2*l1)*ido) + wa2[i - 1]*ref(cc,i + (k + 2*l1)*
645 ci3 = wa2[i - 2]*ref(cc,i + (k + 2*l1)*ido) - wa2[i - 1]*ref(cc,i - 1 + (k + 2*l1)*
647 cr4 = wa3[i - 2]*ref(cc,i - 1 + (k + 3*l1)*ido) + wa3[i - 1]*ref(cc,i + (k + 3*l1)*
649 ci4 = wa3[i - 2]*ref(cc,i + (k + 3*l1)*ido) - wa3[i - 1]*ref(cc,i - 1 + (k + 3*l1)*
655 ti2 = ref(cc,i + k*ido) + ci3;
656 ti3 = ref(cc,i + k*ido) - ci3;
657 tr2 = ref(cc,i - 1 + k*ido) + cr3;
658 tr3 = ref(cc,i - 1 + k*ido) - cr3;
659 ch[i - 1 + 4*k*ido] = tr1 + tr2;
660 ch[ic - 1 + (4*k + 3)*ido] = tr2 - tr1;
661 ch[i + 4*k*ido] = ti1 + ti2;
662 ch[ic + (4*k + 3)*ido] = ti1 - ti2;
663 ch[i - 1 + (4*k + 2)*ido] = ti4 + tr3;
664 ch[ic - 1 + (4*k + 1)*ido] = tr3 - ti4;
665 ch[i + (4*k + 2)*ido] = tr4 + ti3;
666 ch[ic + (4*k + 1)*ido] = tr4 - ti3;
669 if (ido % 2 == 1) return;
671 for (k=0; k<l1; k++) {
672 ti1 = -hsqt2*(ref(cc,ido-1 + (k + l1)*ido) + ref(cc,ido-1 + (k + 3*l1)*ido));
673 tr1 = hsqt2*(ref(cc,ido-1 + (k + l1)*ido) - ref(cc,ido-1 + (k + 3*l1)*ido));
674 ch[ido-1 + 4*k*ido] = tr1 + ref(cc,ido-1 + k*ido);
675 ch[ido-1 + (4*k + 2)*ido] = ref(cc,ido-1 + k*ido) - tr1;
676 ch[(4*k + 1)*ido] = ti1 - ref(cc,ido-1 + (k + 2*l1)*ido);
677 ch[(4*k + 3)*ido] = ti1 + ref(cc,ido-1 + (k + 2*l1)*ido);
682 static void radb4(int ido, int l1, const Treal cc[], Treal ch[],
683 const Treal wa1[], const Treal wa2[], const Treal wa3[])
685 static const Treal sqrt2 = 1.414213562373095;
687 Treal ci2, ci3, ci4, cr2, cr3, cr4, ti1, ti2, ti3, ti4, tr1, tr2, tr3, tr4;
688 for (k = 0; k < l1; k++) {
689 tr1 = ref(cc,4*k*ido) - ref(cc,ido-1 + (4*k + 3)*ido);
690 tr2 = ref(cc,4*k*ido) + ref(cc,ido-1 + (4*k + 3)*ido);
691 tr3 = ref(cc,ido-1 + (4*k + 1)*ido) + ref(cc,ido-1 + (4*k + 1)*ido);
692 tr4 = ref(cc,(4*k + 2)*ido) + ref(cc,(4*k + 2)*ido);
693 ch[k*ido] = tr2 + tr3;
694 ch[(k + l1)*ido] = tr1 - tr4;
695 ch[(k + 2*l1)*ido] = tr2 - tr3;
696 ch[(k + 3*l1)*ido] = tr1 + tr4;
700 for (k = 0; k < l1; ++k) {
701 for (i = 2; i < ido; i += 2) {
703 ti1 = ref(cc,i + 4*k*ido) + ref(cc,ic + (4*k + 3)*ido);
704 ti2 = ref(cc,i + 4*k*ido) - ref(cc,ic + (4*k + 3)*ido);
705 ti3 = ref(cc,i + (4*k + 2)*ido) - ref(cc,ic + (4*k + 1)*ido);
706 tr4 = ref(cc,i + (4*k + 2)*ido) + ref(cc,ic + (4*k + 1)*ido);
707 tr1 = ref(cc,i - 1 + 4*k*ido) - ref(cc,ic - 1 + (4*k + 3)*ido);
708 tr2 = ref(cc,i - 1 + 4*k*ido) + ref(cc,ic - 1 + (4*k + 3)*ido);
709 ti4 = ref(cc,i - 1 + (4*k + 2)*ido) - ref(cc,ic - 1 + (4*k + 1)*ido);
710 tr3 = ref(cc,i - 1 + (4*k + 2)*ido) + ref(cc,ic - 1 + (4*k + 1)*ido);
711 ch[i - 1 + k*ido] = tr2 + tr3;
713 ch[i + k*ido] = ti2 + ti3;
719 ch[i - 1 + (k + l1)*ido] = wa1[i - 2]*cr2 - wa1[i - 1]*ci2;
720 ch[i + (k + l1)*ido] = wa1[i - 2]*ci2 + wa1[i - 1]*cr2;
721 ch[i - 1 + (k + 2*l1)*ido] = wa2[i - 2]*cr3 - wa2[i - 1]*ci3;
722 ch[i + (k + 2*l1)*ido] = wa2[i - 2]*ci3 + wa2[i - 1]*cr3;
723 ch[i - 1 + (k + 3*l1)*ido] = wa3[i - 2]*cr4 - wa3[i - 1]*ci4;
724 ch[i + (k + 3*l1)*ido] = wa3[i - 2]*ci4 + wa3[i - 1]*cr4;
727 if (ido % 2 == 1) return;
729 for (k = 0; k < l1; k++) {
730 ti1 = ref(cc,(4*k + 1)*ido) + ref(cc,(4*k + 3)*ido);
731 ti2 = ref(cc,(4*k + 3)*ido) - ref(cc,(4*k + 1)*ido);
732 tr1 = ref(cc,ido-1 + 4*k*ido) - ref(cc,ido-1 + (4*k + 2)*ido);
733 tr2 = ref(cc,ido-1 + 4*k*ido) + ref(cc,ido-1 + (4*k + 2)*ido);
734 ch[ido-1 + k*ido] = tr2 + tr2;
735 ch[ido-1 + (k + l1)*ido] = sqrt2*(tr1 - ti1);
736 ch[ido-1 + (k + 2*l1)*ido] = ti2 + ti2;
737 ch[ido-1 + (k + 3*l1)*ido] = -sqrt2*(tr1 + ti1);
742 static void radf5(int ido, int l1, const Treal cc[], Treal ch[],
743 const Treal wa1[], const Treal wa2[], const Treal wa3[], const Treal wa4[])
745 static const Treal tr11 = 0.309016994374947;
746 static const Treal ti11 = 0.951056516295154;
747 static const Treal tr12 = -0.809016994374947;
748 static const Treal ti12 = 0.587785252292473;
750 Treal ci2, di2, ci4, ci5, di3, di4, di5, ci3, cr2, cr3, dr2, dr3, dr4, dr5,
751 cr5, cr4, ti2, ti3, ti5, ti4, tr2, tr3, tr4, tr5;
752 for (k = 0; k < l1; k++) {
753 cr2 = ref(cc,(k + 4*l1)*ido) + ref(cc,(k + l1)*ido);
754 ci5 = ref(cc,(k + 4*l1)*ido) - ref(cc,(k + l1)*ido);
755 cr3 = ref(cc,(k + 3*l1)*ido) + ref(cc,(k + 2*l1)*ido);
756 ci4 = ref(cc,(k + 3*l1)*ido) - ref(cc,(k + 2*l1)*ido);
757 ch[5*k*ido] = ref(cc,k*ido) + cr2 + cr3;
758 ch[ido-1 + (5*k + 1)*ido] = ref(cc,k*ido) + tr11*cr2 + tr12*cr3;
759 ch[(5*k + 2)*ido] = ti11*ci5 + ti12*ci4;
760 ch[ido-1 + (5*k + 3)*ido] = ref(cc,k*ido) + tr12*cr2 + tr11*cr3;
761 ch[(5*k + 4)*ido] = ti12*ci5 - ti11*ci4;
763 if (ido == 1) return;
764 for (k = 0; k < l1; ++k) {
765 for (i = 2; i < ido; i += 2) {
767 dr2 = wa1[i - 2]*ref(cc,i - 1 + (k + l1)*ido) + wa1[i - 1]*ref(cc,i + (k + l1)*ido);
768 di2 = wa1[i - 2]*ref(cc,i + (k + l1)*ido) - wa1[i - 1]*ref(cc,i - 1 + (k + l1)*ido);
769 dr3 = wa2[i - 2]*ref(cc,i - 1 + (k + 2*l1)*ido) + wa2[i - 1]*ref(cc,i + (k + 2*l1)*ido);
770 di3 = wa2[i - 2]*ref(cc,i + (k + 2*l1)*ido) - wa2[i - 1]*ref(cc,i - 1 + (k + 2*l1)*ido);
771 dr4 = wa3[i - 2]*ref(cc,i - 1 + (k + 3*l1)*ido) + wa3[i - 1]*ref(cc,i + (k + 3*l1)*ido);
772 di4 = wa3[i - 2]*ref(cc,i + (k + 3*l1)*ido) - wa3[i - 1]*ref(cc,i - 1 + (k + 3*l1)*ido);
773 dr5 = wa4[i - 2]*ref(cc,i - 1 + (k + 4*l1)*ido) + wa4[i - 1]*ref(cc,i + (k + 4*l1)*ido);
774 di5 = wa4[i - 2]*ref(cc,i + (k + 4*l1)*ido) - wa4[i - 1]*ref(cc,i - 1 + (k + 4*l1)*ido);
783 ch[i - 1 + 5*k*ido] = ref(cc,i - 1 + k*ido) + cr2 + cr3;
784 ch[i + 5*k*ido] = ref(cc,i + k*ido) + ci2 + ci3;
785 tr2 = ref(cc,i - 1 + k*ido) + tr11*cr2 + tr12*cr3;
786 ti2 = ref(cc,i + k*ido) + tr11*ci2 + tr12*ci3;
787 tr3 = ref(cc,i - 1 + k*ido) + tr12*cr2 + tr11*cr3;
788 ti3 = ref(cc,i + k*ido) + tr12*ci2 + tr11*ci3;
789 tr5 = ti11*cr5 + ti12*cr4;
790 ti5 = ti11*ci5 + ti12*ci4;
791 tr4 = ti12*cr5 - ti11*cr4;
792 ti4 = ti12*ci5 - ti11*ci4;
793 ch[i - 1 + (5*k + 2)*ido] = tr2 + tr5;
794 ch[ic - 1 + (5*k + 1)*ido] = tr2 - tr5;
795 ch[i + (5*k + 2)*ido] = ti2 + ti5;
796 ch[ic + (5*k + 1)*ido] = ti5 - ti2;
797 ch[i - 1 + (5*k + 4)*ido] = tr3 + tr4;
798 ch[ic - 1 + (5*k + 3)*ido] = tr3 - tr4;
799 ch[i + (5*k + 4)*ido] = ti3 + ti4;
800 ch[ic + (5*k + 3)*ido] = ti4 - ti3;
806 static void radb5(int ido, int l1, const Treal cc[], Treal ch[],
807 const Treal wa1[], const Treal wa2[], const Treal wa3[], const Treal wa4[])
809 static const Treal tr11 = 0.309016994374947;
810 static const Treal ti11 = 0.951056516295154;
811 static const Treal tr12 = -0.809016994374947;
812 static const Treal ti12 = 0.587785252292473;
814 Treal ci2, ci3, ci4, ci5, di3, di4, di5, di2, cr2, cr3, cr5, cr4, ti2, ti3,
815 ti4, ti5, dr3, dr4, dr5, dr2, tr2, tr3, tr4, tr5;
816 for (k = 0; k < l1; k++) {
817 ti5 = 2*ref(cc,(5*k + 2)*ido);
818 ti4 = 2*ref(cc,(5*k + 4)*ido);
819 tr2 = 2*ref(cc,ido-1 + (5*k + 1)*ido);
820 tr3 = 2*ref(cc,ido-1 + (5*k + 3)*ido);
821 ch[k*ido] = ref(cc,5*k*ido) + tr2 + tr3;
822 cr2 = ref(cc,5*k*ido) + tr11*tr2 + tr12*tr3;
823 cr3 = ref(cc,5*k*ido) + tr12*tr2 + tr11*tr3;
824 ci5 = ti11*ti5 + ti12*ti4;
825 ci4 = ti12*ti5 - ti11*ti4;
826 ch[(k + l1)*ido] = cr2 - ci5;
827 ch[(k + 2*l1)*ido] = cr3 - ci4;
828 ch[(k + 3*l1)*ido] = cr3 + ci4;
829 ch[(k + 4*l1)*ido] = cr2 + ci5;
831 if (ido == 1) return;
832 for (k = 0; k < l1; ++k) {
833 for (i = 2; i < ido; i += 2) {
835 ti5 = ref(cc,i + (5*k + 2)*ido) + ref(cc,ic + (5*k + 1)*ido);
836 ti2 = ref(cc,i + (5*k + 2)*ido) - ref(cc,ic + (5*k + 1)*ido);
837 ti4 = ref(cc,i + (5*k + 4)*ido) + ref(cc,ic + (5*k + 3)*ido);
838 ti3 = ref(cc,i + (5*k + 4)*ido) - ref(cc,ic + (5*k + 3)*ido);
839 tr5 = ref(cc,i - 1 + (5*k + 2)*ido) - ref(cc,ic - 1 + (5*k + 1)*ido);
840 tr2 = ref(cc,i - 1 + (5*k + 2)*ido) + ref(cc,ic - 1 + (5*k + 1)*ido);
841 tr4 = ref(cc,i - 1 + (5*k + 4)*ido) - ref(cc,ic - 1 + (5*k + 3)*ido);
842 tr3 = ref(cc,i - 1 + (5*k + 4)*ido) + ref(cc,ic - 1 + (5*k + 3)*ido);
843 ch[i - 1 + k*ido] = ref(cc,i - 1 + 5*k*ido) + tr2 + tr3;
844 ch[i + k*ido] = ref(cc,i + 5*k*ido) + ti2 + ti3;
845 cr2 = ref(cc,i - 1 + 5*k*ido) + tr11*tr2 + tr12*tr3;
847 ci2 = ref(cc,i + 5*k*ido) + tr11*ti2 + tr12*ti3;
848 cr3 = ref(cc,i - 1 + 5*k*ido) + tr12*tr2 + tr11*tr3;
850 ci3 = ref(cc,i + 5*k*ido) + tr12*ti2 + tr11*ti3;
851 cr5 = ti11*tr5 + ti12*tr4;
852 ci5 = ti11*ti5 + ti12*ti4;
853 cr4 = ti12*tr5 - ti11*tr4;
854 ci4 = ti12*ti5 - ti11*ti4;
863 ch[i - 1 + (k + l1)*ido] = wa1[i - 2]*dr2 - wa1[i - 1]*di2;
864 ch[i + (k + l1)*ido] = wa1[i - 2]*di2 + wa1[i - 1]*dr2;
865 ch[i - 1 + (k + 2*l1)*ido] = wa2[i - 2]*dr3 - wa2[i - 1]*di3;
866 ch[i + (k + 2*l1)*ido] = wa2[i - 2]*di3 + wa2[i - 1]*dr3;
867 ch[i - 1 + (k + 3*l1)*ido] = wa3[i - 2]*dr4 - wa3[i - 1]*di4;
868 ch[i + (k + 3*l1)*ido] = wa3[i - 2]*di4 + wa3[i - 1]*dr4;
869 ch[i - 1 + (k + 4*l1)*ido] = wa4[i - 2]*dr5 - wa4[i - 1]*di5;
870 ch[i + (k + 4*l1)*ido] = wa4[i - 2]*di5 + wa4[i - 1]*dr5;
876 static void radfg(int ido, int ip, int l1, int idl1,
877 Treal cc[], Treal ch[], const Treal wa[])
879 static const Treal twopi = 6.28318530717959;
880 int idij, ipph, i, j, k, l, j2, ic, jc, lc, ik, is, nbd;
881 Treal dc2, ai1, ai2, ar1, ar2, ds2, dcp, arg, dsp, ar1h, ar2h;
888 for (ik=0; ik<idl1; ik++) ch[ik] = cc[ik];
891 ch[(k + j*l1)*ido] = cc[(k + j*l1)*ido];
894 for (j=1; j<ip; j++) {
897 for (i=2; i<ido; i+=2) {
899 for (k=0; k<l1; k++) {
900 ch[i - 1 + (k + j*l1)*ido] =
901 wa[idij - 1]*cc[i - 1 + (k + j*l1)*ido] + wa[idij]*cc[i + (k + j*l1)*ido];
902 ch[i + (k + j*l1)*ido] =
903 wa[idij - 1]*cc[i + (k + j*l1)*ido] - wa[idij]*cc[i - 1 + (k + j*l1)*ido];
909 for (j=1; j<ip; j++) {
911 for (k=0; k<l1; k++) {
913 for (i=2; i<ido; i+=2) {
915 ch[i - 1 + (k + j*l1)*ido] =
916 wa[idij - 1]*cc[i - 1 + (k + j*l1)*ido] + wa[idij]*cc[i + (k + j*l1)*ido];
917 ch[i + (k + j*l1)*ido] =
918 wa[idij - 1]*cc[i + (k + j*l1)*ido] - wa[idij]*cc[i - 1 + (k + j*l1)*ido];
924 for (j=1; j<ipph; j++) {
926 for (k=0; k<l1; k++) {
927 for (i=2; i<ido; i+=2) {
928 cc[i - 1 + (k + j*l1)*ido] = ch[i - 1 + (k + j*l1)*ido] + ch[i - 1 + (k + jc*l1)*ido];
929 cc[i - 1 + (k + jc*l1)*ido] = ch[i + (k + j*l1)*ido] - ch[i + (k + jc*l1)*ido];
930 cc[i + (k + j*l1)*ido] = ch[i + (k + j*l1)*ido] + ch[i + (k + jc*l1)*ido];
931 cc[i + (k + jc*l1)*ido] = ch[i - 1 + (k + jc*l1)*ido] - ch[i - 1 + (k + j*l1)*ido];
936 for (j=1; j<ipph; j++) {
938 for (i=2; i<ido; i+=2) {
939 for (k=0; k<l1; k++) {
940 cc[i - 1 + (k + j*l1)*ido] =
941 ch[i - 1 + (k + j*l1)*ido] + ch[i - 1 + (k + jc*l1)*ido];
942 cc[i - 1 + (k + jc*l1)*ido] = ch[i + (k + j*l1)*ido] - ch[i + (k + jc*l1)*ido];
943 cc[i + (k + j*l1)*ido] = ch[i + (k + j*l1)*ido] + ch[i + (k + jc*l1)*ido];
944 cc[i + (k + jc*l1)*ido] = ch[i - 1 + (k + jc*l1)*ido] - ch[i - 1 + (k + j*l1)*ido];
949 } else { /* now ido == 1 */
950 for (ik=0; ik<idl1; ik++) cc[ik] = ch[ik];
952 for (j=1; j<ipph; j++) {
954 for (k=0; k<l1; k++) {
955 cc[(k + j*l1)*ido] = ch[(k + j*l1)*ido] + ch[(k + jc*l1)*ido];
956 cc[(k + jc*l1)*ido] = ch[(k + jc*l1)*ido] - ch[(k + j*l1)*ido];
962 for (l=1; l<ipph; l++) {
964 ar1h = dcp*ar1 - dsp*ai1;
965 ai1 = dcp*ai1 + dsp*ar1;
967 for (ik=0; ik<idl1; ik++) {
968 ch[ik + l*idl1] = cc[ik] + ar1*cc[ik + idl1];
969 ch[ik + lc*idl1] = ai1*cc[ik + (ip-1)*idl1];
975 for (j=2; j<ipph; j++) {
977 ar2h = dc2*ar2 - ds2*ai2;
978 ai2 = dc2*ai2 + ds2*ar2;
980 for (ik=0; ik<idl1; ik++) {
981 ch[ik + l*idl1] += ar2*cc[ik + j*idl1];
982 ch[ik + lc*idl1] += ai2*cc[ik + jc*idl1];
986 for (j=1; j<ipph; j++)
987 for (ik=0; ik<idl1; ik++)
988 ch[ik] += cc[ik + j*idl1];
991 for (k=0; k<l1; k++) {
992 for (i=0; i<ido; i++) {
993 ref(cc,i + k*ip*ido) = ch[i + k*ido];
997 for (i=0; i<ido; i++) {
998 for (k=0; k<l1; k++) {
999 ref(cc,i + k*ip*ido) = ch[i + k*ido];
1003 for (j=1; j<ipph; j++) {
1006 for (k=0; k<l1; k++) {
1007 ref(cc,ido-1 + (j2 - 1 + k*ip)*ido) =
1009 ref(cc,(j2 + k*ip)*ido) =
1010 ch[(k + jc*l1)*ido];
1013 if (ido == 1) return;
1015 for (j=1; j<ipph; j++) {
1018 for (k=0; k<l1; k++) {
1019 for (i=2; i<ido; i+=2) {
1021 ref(cc,i - 1 + (j2 + k*ip)*ido) = ch[i - 1 + (k + j*l1)*ido] + ch[i - 1 + (k + jc*l1)*ido];
1022 ref(cc,ic - 1 + (j2 - 1 + k*ip)*ido) = ch[i - 1 + (k + j*l1)*ido] - ch[i - 1 + (k + jc*l1)*ido];
1023 ref(cc,i + (j2 + k*ip)*ido) = ch[i + (k + j*l1)*ido] + ch[i + (k + jc*l1)*ido];
1024 ref(cc,ic + (j2 - 1 + k*ip)*ido) = ch[i + (k + jc*l1)*ido] - ch[i + (k + j*l1)*ido];
1029 for (j=1; j<ipph; j++) {
1032 for (i=2; i<ido; i+=2) {
1034 for (k=0; k<l1; k++) {
1035 ref(cc,i - 1 + (j2 + k*ip)*ido) = ch[i - 1 + (k + j*l1)*ido] + ch[i - 1 + (k + jc*l1)*ido];
1036 ref(cc,ic - 1 + (j2 - 1 + k*ip)*ido) = ch[i - 1 + (k + j*l1)*ido] - ch[i - 1 + (k + jc*l1)*ido];
1037 ref(cc,i + (j2 + k*ip)*ido) = ch[i + (k + j*l1)*ido] + ch[i + (k + jc*l1)*ido];
1038 ref(cc,ic + (j2 - 1 + k*ip)*ido) = ch[i + (k + jc*l1)*ido] - ch[i + (k + j*l1)*ido];
1046 static void radbg(int ido, int ip, int l1, int idl1,
1047 Treal cc[], Treal ch[], const Treal wa[])
1049 static const Treal twopi = 6.28318530717959;
1050 int idij, ipph, i, j, k, l, j2, ic, jc, lc, ik, is;
1051 Treal dc2, ai1, ai2, ar1, ar2, ds2;
1053 Treal dcp, arg, dsp, ar1h, ar2h;
1057 nbd = (ido - 1) / 2;
1058 ipph = (ip + 1) / 2;
1060 for (k=0; k<l1; k++) {
1061 for (i=0; i<ido; i++) {
1062 ch[i + k*ido] = ref(cc,i + k*ip*ido);
1066 for (i=0; i<ido; i++) {
1067 for (k=0; k<l1; k++) {
1068 ch[i + k*ido] = ref(cc,i + k*ip*ido);
1072 for (j=1; j<ipph; j++) {
1075 for (k=0; k<l1; k++) {
1076 ch[(k + j*l1)*ido] = ref(cc,ido-1 + (j2 - 1 + k*ip)*ido) + ref(cc,ido-1 + (j2 - 1 + k*ip)*
1078 ch[(k + jc*l1)*ido] = ref(cc,(j2 + k*ip)*ido) + ref(cc,(j2 + k*ip)*ido);
1084 for (j=1; j<ipph; j++) {
1086 for (k=0; k<l1; k++) {
1087 for (i=2; i<ido; i+=2) {
1089 ch[i - 1 + (k + j*l1)*ido] = ref(cc,i - 1 + (2*j + k*ip)*ido) + ref(cc,
1090 ic - 1 + (2*j - 1 + k*ip)*ido);
1091 ch[i - 1 + (k + jc*l1)*ido] = ref(cc,i - 1 + (2*j + k*ip)*ido) -
1092 ref(cc,ic - 1 + (2*j - 1 + k*ip)*ido);
1093 ch[i + (k + j*l1)*ido] = ref(cc,i + (2*j + k*ip)*ido) - ref(cc,ic
1094 + (2*j - 1 + k*ip)*ido);
1095 ch[i + (k + jc*l1)*ido] = ref(cc,i + (2*j + k*ip)*ido) + ref(cc,ic
1096 + (2*j - 1 + k*ip)*ido);
1101 for (j=1; j<ipph; j++) {
1103 for (i=2; i<ido; i+=2) {
1105 for (k=0; k<l1; k++) {
1106 ch[i - 1 + (k + j*l1)*ido] = ref(cc,i - 1 + (2*j + k*ip)*ido) + ref(cc,
1107 ic - 1 + (2*j - 1 + k*ip)*ido);
1108 ch[i - 1 + (k + jc*l1)*ido] = ref(cc,i - 1 + (2*j + k*ip)*ido) -
1109 ref(cc,ic - 1 + (2*j - 1 + k*ip)*ido);
1110 ch[i + (k + j*l1)*ido] = ref(cc,i + (2*j + k*ip)*ido) - ref(cc,ic
1111 + (2*j - 1 + k*ip)*ido);
1112 ch[i + (k + jc*l1)*ido] = ref(cc,i + (2*j + k*ip)*ido) + ref(cc,ic
1113 + (2*j - 1 + k*ip)*ido);
1122 for (l=1; l<ipph; l++) {
1124 ar1h = dcp*ar1 - dsp*ai1;
1125 ai1 = dcp*ai1 + dsp*ar1;
1127 for (ik=0; ik<idl1; ik++) {
1128 cc[ik + l*idl1] = ch[ik] + ar1*ch[ik + idl1];
1129 cc[ik + lc*idl1] = ai1*ch[ik + (ip-1)*idl1];
1135 for (j=2; j<ipph; j++) {
1137 ar2h = dc2*ar2 - ds2*ai2;
1138 ai2 = dc2*ai2 + ds2*ar2;
1140 for (ik=0; ik<idl1; ik++) {
1141 cc[ik + l*idl1] += ar2*ch[ik + j*idl1];
1142 cc[ik + lc*idl1] += ai2*ch[ik + jc*idl1];
1146 for (j=1; j<ipph; j++) {
1147 for (ik=0; ik<idl1; ik++) {
1148 ch[ik] += ch[ik + j*idl1];
1151 for (j=1; j<ipph; j++) {
1153 for (k=0; k<l1; k++) {
1154 ch[(k + j*l1)*ido] = cc[(k + j*l1)*ido] - cc[(k + jc*l1)*ido];
1155 ch[(k + jc*l1)*ido] = cc[(k + j*l1)*ido] + cc[(k + jc*l1)*ido];
1159 if (ido == 1) return;
1161 for (j=1; j<ipph; j++) {
1163 for (k=0; k<l1; k++) {
1164 for (i=2; i<ido; i+=2) {
1165 ch[i - 1 + (k + j*l1)*ido] = cc[i - 1 + (k + j*l1)*ido] - cc[i + (k + jc*l1)*ido];
1166 ch[i - 1 + (k + jc*l1)*ido] = cc[i - 1 + (k + j*l1)*ido] + cc[i + (k + jc*l1)*ido];
1167 ch[i + (k + j*l1)*ido] = cc[i + (k + j*l1)*ido] + cc[i - 1 + (k + jc*l1)*ido];
1168 ch[i + (k + jc*l1)*ido] = cc[i + (k + j*l1)*ido] - cc[i - 1 + (k + jc*l1)*ido];
1173 for (j=1; j<ipph; j++) {
1175 for (i=2; i<ido; i+=2) {
1176 for (k=0; k<l1; k++) {
1177 ch[i - 1 + (k + j*l1)*ido] = cc[i - 1 + (k + j*l1)*ido] - cc[i + (k + jc*l1)*ido];
1178 ch[i - 1 + (k + jc*l1)*ido] = cc[i - 1 + (k + j *l1)*ido] + cc[i + (k + jc*l1)*ido];
1179 ch[i + (k + j*l1)*ido] = cc[i + (k + j*l1)*ido] + cc[i - 1 + (k + jc*l1)*ido];
1180 ch[i + (k + jc*l1)*ido] = cc[i + (k + j*l1)*ido] - cc[i - 1 + (k + jc*l1)*ido];
1185 for (ik=0; ik<idl1; ik++) cc[ik] = ch[ik];
1186 for (j=1; j<ip; j++)
1187 for (k=0; k<l1; k++)
1188 cc[(k + j*l1)*ido] = ch[(k + j*l1)*ido];
1191 for (j=1; j<ip; j++) {
1194 for (i=2; i<ido; i+=2) {
1196 for (k=0; k<l1; k++) {
1197 cc[i - 1 + (k + j*l1)*ido] = wa[idij - 1]*ch[i - 1 + (k + j*l1)*ido] - wa[idij]*
1198 ch[i + (k + j*l1)*ido];
1199 cc[i + (k + j*l1)*ido] = wa[idij - 1]*ch[i + (k + j*l1)*ido] + wa[idij]*ch[i - 1 + (k + j*l1)*ido];
1205 for (j=1; j<ip; j++) {
1207 for (k=0; k<l1; k++) {
1209 for (i=2; i<ido; i+=2) {
1211 cc[i - 1 + (k + j*l1)*ido] = wa[idij-1]*ch[i - 1 + (k + j*l1)*ido] - wa[idij]*
1212 ch[i + (k + j*l1)*ido];
1213 cc[i + (k + j*l1)*ido] = wa[idij-1]*ch[i + (k + j*l1)*ido] + wa[idij]*ch[i - 1 + (k + j*l1)*ido];
1220 /* ----------------------------------------------------------------------
1221 cfftf1, cfftf, cfftb, cffti1, cffti. Complex FFTs.
1222 ---------------------------------------------------------------------- */
1224 void fftpack_cfftf1(int n, Treal c[], Treal ch[], const Treal wa[], const int ifac[MAXFAC+2], int isign)
1228 int na, nf, ip, iw, ix2, ix3, ix4, nac, ido, idl1;
1229 Treal *cinput, *coutput;
1234 for (k1=2; k1<=nf+1; k1++) {
1251 passf4(idot, l1, cinput, coutput, &wa[iw], &wa[ix2], &wa[ix3], isign);
1255 passf2(idot, l1, cinput, coutput, &wa[iw], isign);
1260 passf3(idot, l1, cinput, coutput, &wa[iw], &wa[ix2], isign);
1267 passf5(idot, l1, cinput, coutput, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4], isign);
1271 passf(&nac, idot, ip, l1, idl1, cinput, coutput, &wa[iw], isign);
1272 if (nac != 0) na = !na;
1275 iw += (ip - 1)*idot;
1277 if (na == 0) return;
1278 for (i=0; i<2*n; i++) c[i] = ch[i];
1282 void fftpack_cfftf(int n, Treal c[], Treal wsave[])
1288 fftpack_cfftf1(n, c, wsave, wsave+iw1, (int*)(wsave+iw2), -1);
1292 void fftpack_cfftb(int n, Treal c[], Treal wsave[])
1298 fftpack_cfftf1(n, c, wsave, wsave+iw1, (int*)(wsave+iw2), +1);
1302 static void factorize(int n, int ifac[MAXFAC+2], const int ntryh[NSPECIAL])
1303 /* Factorize n in factors in ntryh and rest. On exit,
1304 ifac[0] contains n and ifac[1] contains number of factors,
1305 the factors start from ifac[2]. */
1307 int ntry=3, i, j=0, ib, nf=0, nl=n, nq, nr;
1317 if (nr != 0) goto startloop;
1319 ifac[nf + 1] = ntry;
1321 if (ntry == 2 && nf != 1) {
1322 for (i=2; i<=nf; i++) {
1324 ifac[ib + 1] = ifac[ib];
1334 void fftpack_cffti1(int n, Treal wa[], int ifac[MAXFAC+2])
1336 static const Treal twopi = 6.28318530717959;
1337 Treal arg, argh, argld, fi;
1343 static const int ntryh[NSPECIAL] = {
1344 3,4,2,5 }; /* Do not change the order of these. */
1346 factorize(n,ifac,ntryh);
1348 argh = twopi/(Treal)n;
1351 for (k1=1; k1<=nf; k1++) {
1356 idot = ido + ido + 2;
1358 for (j=1; j<=ipm; j++) {
1365 for (ii=4; ii<=idot; ii+=2) {
1382 void fftpack_cffti(int n, Treal wsave[])
1388 fftpack_cffti1(n, wsave+iw1, (int*)(wsave+iw2));
1391 /* ----------------------------------------------------------------------
1392 rfftf1, rfftb1, rfftf, rfftb, rffti1, rffti. Treal FFTs.
1393 ---------------------------------------------------------------------- */
1395 void fftpack_rfftf1(int n, Treal c[], Treal ch[], const Treal wa[], const int ifac[MAXFAC+2])
1398 int k1, l1, l2, na, kh, nf, ip, iw, ix2, ix3, ix4, ido, idl1;
1399 Treal *cinput, *coutput;
1404 for (k1 = 1; k1 <= nf; ++k1) {
1423 radf4(ido, l1, cinput, coutput, &wa[iw], &wa[ix2], &wa[ix3]);
1426 radf2(ido, l1, cinput, coutput, &wa[iw]);
1430 radf3(ido, l1, cinput, coutput, &wa[iw], &wa[ix2]);
1436 radf5(ido, l1, cinput, coutput, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4]);
1442 radfg(ido, ip, l1, idl1, c, ch, &wa[iw]);
1445 radfg(ido, ip, l1, idl1, ch, c, &wa[iw]);
1451 if (na == 1) return;
1452 for (i = 0; i < n; i++) c[i] = ch[i];
1456 void fftpack_rfftb1(int n, Treal c[], Treal ch[], const Treal wa[], const int ifac[MAXFAC+2])
1459 int k1, l1, l2, na, nf, ip, iw, ix2, ix3, ix4, ido, idl1;
1460 Treal *cinput, *coutput;
1465 for (k1=1; k1<=nf; k1++) {
1481 radb4(ido, l1, cinput, coutput, &wa[iw], &wa[ix2], &wa[ix3]);
1485 radb2(ido, l1, cinput, coutput, &wa[iw]);
1490 radb3(ido, l1, cinput, coutput, &wa[iw], &wa[ix2]);
1497 radb5(ido, l1, cinput, coutput, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4]);
1501 radbg(ido, ip, l1, idl1, cinput, coutput, &wa[iw]);
1502 if (ido == 1) na = !na;
1507 if (na == 0) return;
1508 for (i=0; i<n; i++) c[i] = ch[i];
1512 void fftpack_rfftf(int n, Treal r[], Treal wsave[])
1515 fftpack_rfftf1(n, r, wsave, wsave+n, (int*)(wsave+2*n));
1519 void fftpack_rfftb(int n, Treal r[], Treal wsave[])
1522 fftpack_rfftb1(n, r, wsave, wsave+n, (int*)(wsave+2*n));
1526 void fftpack_rffti1(int n, Treal wa[], int ifac[MAXFAC+2])
1528 static const Treal twopi = 6.28318530717959;
1529 Treal arg, argh, argld, fi;
1532 int ld, ii, nf, ip, is;
1534 static const int ntryh[NSPECIAL] = {
1535 4,2,3,5 }; /* Do not change the order of these. */
1536 factorize(n,ifac,ntryh);
1542 if (nfm1 == 0) return;
1543 for (k1 = 1; k1 <= nfm1; k1++) {
1549 for (j = 1; j <= ipm; ++j) {
1552 argld = (Treal) ld*argh;
1554 for (ii = 3; ii <= ido; ii += 2) {
1558 wa[i - 2] = cos(arg);
1559 wa[i - 1] = sin(arg);
1568 void fftpack_rffti(int n, Treal wsave[])
1571 fftpack_rffti1(n, wsave+n, (int*)(wsave+2*n));