Jamoma API  0.6.0.a19
fftsg.cpp
1 /*
2 Fast Fourier/Cosine/Sine Transform
3  dimension :one
4  data length :power of 2
5  decimation :frequency
6  radix :split-radix
7  data :inplace
8  table :use
9 functions
10  cdft: Complex Discrete Fourier Transform
11  rdft: Real Discrete Fourier Transform
12  ddct: Discrete Cosine Transform
13  ddst: Discrete Sine Transform
14  dfct: Cosine Transform of RDFT (Real Symmetric DFT)
15  dfst: Sine Transform of RDFT (Real Anti-symmetric DFT)
16 function prototypes
17  void cdft(int, int, double *, int *, double *);
18  void rdft(int, int, double *, int *, double *);
19  void ddct(int, int, double *, int *, double *);
20  void ddst(int, int, double *, int *, double *);
21  void dfct(int, double *, double *, int *, double *);
22  void dfst(int, double *, double *, int *, double *);
23 macro definitions
24  USE_CDFT_PTHREADS : default=not defined
25  CDFT_THREADS_BEGIN_N : must be >= 512, default=8192
26  CDFT_4THREADS_BEGIN_N : must be >= 512, default=65536
27  USE_CDFT_WINTHREADS : default=not defined
28  CDFT_THREADS_BEGIN_N : must be >= 512, default=32768
29  CDFT_4THREADS_BEGIN_N : must be >= 512, default=524288
30 
31 
32 -------- Complex DFT (Discrete Fourier Transform) --------
33  [definition]
34  <case1>
35  X[k] = sum_j=0^n-1 x[j]*exp(2*pi*i*j*k/n), 0<=k<n
36  <case2>
37  X[k] = sum_j=0^n-1 x[j]*exp(-2*pi*i*j*k/n), 0<=k<n
38  (notes: sum_j=0^n-1 is a summation from j=0 to n-1)
39  [usage]
40  <case1>
41  ip[0] = 0; // first time only
42  cdft(2*n, 1, a, ip, w);
43  <case2>
44  ip[0] = 0; // first time only
45  cdft(2*n, -1, a, ip, w);
46  [parameters]
47  2*n :data length (int)
48  n >= 1, n = power of 2
49  a[0...2*n-1] :input/output data (double *)
50  input data
51  a[2*j] = Re(x[j]),
52  a[2*j+1] = Im(x[j]), 0<=j<n
53  output data
54  a[2*k] = Re(X[k]),
55  a[2*k+1] = Im(X[k]), 0<=k<n
56  ip[0...*] :work area for bit reversal (int *)
57  length of ip >= 2+sqrt(n)
58  strictly,
59  length of ip >=
60  2+(1<<(int)(log(n+0.5)/log(2))/2).
61  ip[0],ip[1] are pointers of the cos/sin table.
62  w[0...n/2-1] :cos/sin table (double *)
63  w[],ip[] are initialized if ip[0] == 0.
64  [remark]
65  Inverse of
66  cdft(2*n, -1, a, ip, w);
67  is
68  cdft(2*n, 1, a, ip, w);
69  for (j = 0; j <= 2 * n - 1; j++) {
70  a[j] *= 1.0 / n;
71  }
72  .
73 
74 
75 -------- Real DFT / Inverse of Real DFT --------
76  [definition]
77  <case1> RDFT
78  R[k] = sum_j=0^n-1 a[j]*cos(2*pi*j*k/n), 0<=k<=n/2
79  I[k] = sum_j=0^n-1 a[j]*sin(2*pi*j*k/n), 0<k<n/2
80  <case2> IRDFT (excluding scale)
81  a[k] = (R[0] + R[n/2]*cos(pi*k))/2 +
82  sum_j=1^n/2-1 R[j]*cos(2*pi*j*k/n) +
83  sum_j=1^n/2-1 I[j]*sin(2*pi*j*k/n), 0<=k<n
84  [usage]
85  <case1>
86  ip[0] = 0; // first time only
87  rdft(n, 1, a, ip, w);
88  <case2>
89  ip[0] = 0; // first time only
90  rdft(n, -1, a, ip, w);
91  [parameters]
92  n :data length (int)
93  n >= 2, n = power of 2
94  a[0...n-1] :input/output data (double *)
95  <case1>
96  output data
97  a[2*k] = R[k], 0<=k<n/2
98  a[2*k+1] = I[k], 0<k<n/2
99  a[1] = R[n/2]
100  <case2>
101  input data
102  a[2*j] = R[j], 0<=j<n/2
103  a[2*j+1] = I[j], 0<j<n/2
104  a[1] = R[n/2]
105  ip[0...*] :work area for bit reversal (int *)
106  length of ip >= 2+sqrt(n/2)
107  strictly,
108  length of ip >=
109  2+(1<<(int)(log(n/2+0.5)/log(2))/2).
110  ip[0],ip[1] are pointers of the cos/sin table.
111  w[0...n/2-1] :cos/sin table (double *)
112  w[],ip[] are initialized if ip[0] == 0.
113  [remark]
114  Inverse of
115  rdft(n, 1, a, ip, w);
116  is
117  rdft(n, -1, a, ip, w);
118  for (j = 0; j <= n - 1; j++) {
119  a[j] *= 2.0 / n;
120  }
121  .
122 
123 
124 -------- DCT (Discrete Cosine Transform) / Inverse of DCT --------
125  [definition]
126  <case1> IDCT (excluding scale)
127  C[k] = sum_j=0^n-1 a[j]*cos(pi*j*(k+1/2)/n), 0<=k<n
128  <case2> DCT
129  C[k] = sum_j=0^n-1 a[j]*cos(pi*(j+1/2)*k/n), 0<=k<n
130  [usage]
131  <case1>
132  ip[0] = 0; // first time only
133  ddct(n, 1, a, ip, w);
134  <case2>
135  ip[0] = 0; // first time only
136  ddct(n, -1, a, ip, w);
137  [parameters]
138  n :data length (int)
139  n >= 2, n = power of 2
140  a[0...n-1] :input/output data (double *)
141  output data
142  a[k] = C[k], 0<=k<n
143  ip[0...*] :work area for bit reversal (int *)
144  length of ip >= 2+sqrt(n/2)
145  strictly,
146  length of ip >=
147  2+(1<<(int)(log(n/2+0.5)/log(2))/2).
148  ip[0],ip[1] are pointers of the cos/sin table.
149  w[0...n*5/4-1] :cos/sin table (double *)
150  w[],ip[] are initialized if ip[0] == 0.
151  [remark]
152  Inverse of
153  ddct(n, -1, a, ip, w);
154  is
155  a[0] *= 0.5;
156  ddct(n, 1, a, ip, w);
157  for (j = 0; j <= n - 1; j++) {
158  a[j] *= 2.0 / n;
159  }
160  .
161 
162 
163 -------- DST (Discrete Sine Transform) / Inverse of DST --------
164  [definition]
165  <case1> IDST (excluding scale)
166  S[k] = sum_j=1^n A[j]*sin(pi*j*(k+1/2)/n), 0<=k<n
167  <case2> DST
168  S[k] = sum_j=0^n-1 a[j]*sin(pi*(j+1/2)*k/n), 0<k<=n
169  [usage]
170  <case1>
171  ip[0] = 0; // first time only
172  ddst(n, 1, a, ip, w);
173  <case2>
174  ip[0] = 0; // first time only
175  ddst(n, -1, a, ip, w);
176  [parameters]
177  n :data length (int)
178  n >= 2, n = power of 2
179  a[0...n-1] :input/output data (double *)
180  <case1>
181  input data
182  a[j] = A[j], 0<j<n
183  a[0] = A[n]
184  output data
185  a[k] = S[k], 0<=k<n
186  <case2>
187  output data
188  a[k] = S[k], 0<k<n
189  a[0] = S[n]
190  ip[0...*] :work area for bit reversal (int *)
191  length of ip >= 2+sqrt(n/2)
192  strictly,
193  length of ip >=
194  2+(1<<(int)(log(n/2+0.5)/log(2))/2).
195  ip[0],ip[1] are pointers of the cos/sin table.
196  w[0...n*5/4-1] :cos/sin table (double *)
197  w[],ip[] are initialized if ip[0] == 0.
198  [remark]
199  Inverse of
200  ddst(n, -1, a, ip, w);
201  is
202  a[0] *= 0.5;
203  ddst(n, 1, a, ip, w);
204  for (j = 0; j <= n - 1; j++) {
205  a[j] *= 2.0 / n;
206  }
207  .
208 
209 
210 -------- Cosine Transform of RDFT (Real Symmetric DFT) --------
211  [definition]
212  C[k] = sum_j=0^n a[j]*cos(pi*j*k/n), 0<=k<=n
213  [usage]
214  ip[0] = 0; // first time only
215  dfct(n, a, t, ip, w);
216  [parameters]
217  n :data length - 1 (int)
218  n >= 2, n = power of 2
219  a[0...n] :input/output data (double *)
220  output data
221  a[k] = C[k], 0<=k<=n
222  t[0...n/2] :work area (double *)
223  ip[0...*] :work area for bit reversal (int *)
224  length of ip >= 2+sqrt(n/4)
225  strictly,
226  length of ip >=
227  2+(1<<(int)(log(n/4+0.5)/log(2))/2).
228  ip[0],ip[1] are pointers of the cos/sin table.
229  w[0...n*5/8-1] :cos/sin table (double *)
230  w[],ip[] are initialized if ip[0] == 0.
231  [remark]
232  Inverse of
233  a[0] *= 0.5;
234  a[n] *= 0.5;
235  dfct(n, a, t, ip, w);
236  is
237  a[0] *= 0.5;
238  a[n] *= 0.5;
239  dfct(n, a, t, ip, w);
240  for (j = 0; j <= n; j++) {
241  a[j] *= 2.0 / n;
242  }
243  .
244 
245 
246 -------- Sine Transform of RDFT (Real Anti-symmetric DFT) --------
247  [definition]
248  S[k] = sum_j=1^n-1 a[j]*sin(pi*j*k/n), 0<k<n
249  [usage]
250  ip[0] = 0; // first time only
251  dfst(n, a, t, ip, w);
252  [parameters]
253  n :data length + 1 (int)
254  n >= 2, n = power of 2
255  a[0...n-1] :input/output data (double *)
256  output data
257  a[k] = S[k], 0<k<n
258  (a[0] is used for work area)
259  t[0...n/2-1] :work area (double *)
260  ip[0...*] :work area for bit reversal (int *)
261  length of ip >= 2+sqrt(n/4)
262  strictly,
263  length of ip >=
264  2+(1<<(int)(log(n/4+0.5)/log(2))/2).
265  ip[0],ip[1] are pointers of the cos/sin table.
266  w[0...n*5/8-1] :cos/sin table (double *)
267  w[],ip[] are initialized if ip[0] == 0.
268  [remark]
269  Inverse of
270  dfst(n, a, t, ip, w);
271  is
272  dfst(n, a, t, ip, w);
273  for (j = 1; j <= n - 1; j++) {
274  a[j] *= 2.0 / n;
275  }
276  .
277 
278 
279 Appendix :
280  The cos/sin table is recalculated when the larger table required.
281  w[] and ip[] are compatible with all routines.
282 */
283 
284 
285 extern "C" void cdft(int n, int isgn, double *a, int *ip, double *w)
286 {
287  void makewt(int nw, int *ip, double *w);
288  void cftfsub(int n, double *a, int *ip, int nw, double *w);
289  void cftbsub(int n, double *a, int *ip, int nw, double *w);
290  int nw;
291 
292  nw = ip[0];
293  if (n > (nw << 2)) {
294  nw = n >> 2;
295  makewt(nw, ip, w);
296  }
297  if (isgn >= 0) {
298  cftfsub(n, a, ip, nw, w);
299  } else {
300  cftbsub(n, a, ip, nw, w);
301  }
302 }
303 
304 
305 extern "C" void rdft(int n, int isgn, double *a, int *ip, double *w)
306 {
307  void makewt(int nw, int *ip, double *w);
308  void makect(int nc, int *ip, double *c);
309  void cftfsub(int n, double *a, int *ip, int nw, double *w);
310  void cftbsub(int n, double *a, int *ip, int nw, double *w);
311  void rftfsub(int n, double *a, int nc, double *c);
312  void rftbsub(int n, double *a, int nc, double *c);
313  int nw, nc;
314  double xi;
315 
316  nw = ip[0];
317  if (n > (nw << 2)) {
318  nw = n >> 2;
319  makewt(nw, ip, w);
320  }
321  nc = ip[1];
322  if (n > (nc << 2)) {
323  nc = n >> 2;
324  makect(nc, ip, w + nw);
325  }
326  if (isgn >= 0) {
327  if (n > 4) {
328  cftfsub(n, a, ip, nw, w);
329  rftfsub(n, a, nc, w + nw);
330  } else if (n == 4) {
331  cftfsub(n, a, ip, nw, w);
332  }
333  xi = a[0] - a[1];
334  a[0] += a[1];
335  a[1] = xi;
336  } else {
337  a[1] = 0.5 * (a[0] - a[1]);
338  a[0] -= a[1];
339  if (n > 4) {
340  rftbsub(n, a, nc, w + nw);
341  cftbsub(n, a, ip, nw, w);
342  } else if (n == 4) {
343  cftbsub(n, a, ip, nw, w);
344  }
345  }
346 }
347 
348 
349 extern "C" void ddct(int n, int isgn, double *a, int *ip, double *w)
350 {
351  void makewt(int nw, int *ip, double *w);
352  void makect(int nc, int *ip, double *c);
353  void cftfsub(int n, double *a, int *ip, int nw, double *w);
354  void cftbsub(int n, double *a, int *ip, int nw, double *w);
355  void rftfsub(int n, double *a, int nc, double *c);
356  void rftbsub(int n, double *a, int nc, double *c);
357  void dctsub(int n, double *a, int nc, double *c);
358  int j, nw, nc;
359  double xr;
360 
361  nw = ip[0];
362  if (n > (nw << 2)) {
363  nw = n >> 2;
364  makewt(nw, ip, w);
365  }
366  nc = ip[1];
367  if (n > nc) {
368  nc = n;
369  makect(nc, ip, w + nw);
370  }
371  if (isgn < 0) {
372  xr = a[n - 1];
373  for (j = n - 2; j >= 2; j -= 2) {
374  a[j + 1] = a[j] - a[j - 1];
375  a[j] += a[j - 1];
376  }
377  a[1] = a[0] - xr;
378  a[0] += xr;
379  if (n > 4) {
380  rftbsub(n, a, nc, w + nw);
381  cftbsub(n, a, ip, nw, w);
382  } else if (n == 4) {
383  cftbsub(n, a, ip, nw, w);
384  }
385  }
386  dctsub(n, a, nc, w + nw);
387  if (isgn >= 0) {
388  if (n > 4) {
389  cftfsub(n, a, ip, nw, w);
390  rftfsub(n, a, nc, w + nw);
391  } else if (n == 4) {
392  cftfsub(n, a, ip, nw, w);
393  }
394  xr = a[0] - a[1];
395  a[0] += a[1];
396  for (j = 2; j < n; j += 2) {
397  a[j - 1] = a[j] - a[j + 1];
398  a[j] += a[j + 1];
399  }
400  a[n - 1] = xr;
401  }
402 }
403 
404 
405 extern "C" void ddst(int n, int isgn, double *a, int *ip, double *w)
406 {
407  void makewt(int nw, int *ip, double *w);
408  void makect(int nc, int *ip, double *c);
409  void cftfsub(int n, double *a, int *ip, int nw, double *w);
410  void cftbsub(int n, double *a, int *ip, int nw, double *w);
411  void rftfsub(int n, double *a, int nc, double *c);
412  void rftbsub(int n, double *a, int nc, double *c);
413  void dstsub(int n, double *a, int nc, double *c);
414  int j, nw, nc;
415  double xr;
416 
417  nw = ip[0];
418  if (n > (nw << 2)) {
419  nw = n >> 2;
420  makewt(nw, ip, w);
421  }
422  nc = ip[1];
423  if (n > nc) {
424  nc = n;
425  makect(nc, ip, w + nw);
426  }
427  if (isgn < 0) {
428  xr = a[n - 1];
429  for (j = n - 2; j >= 2; j -= 2) {
430  a[j + 1] = -a[j] - a[j - 1];
431  a[j] -= a[j - 1];
432  }
433  a[1] = a[0] + xr;
434  a[0] -= xr;
435  if (n > 4) {
436  rftbsub(n, a, nc, w + nw);
437  cftbsub(n, a, ip, nw, w);
438  } else if (n == 4) {
439  cftbsub(n, a, ip, nw, w);
440  }
441  }
442  dstsub(n, a, nc, w + nw);
443  if (isgn >= 0) {
444  if (n > 4) {
445  cftfsub(n, a, ip, nw, w);
446  rftfsub(n, a, nc, w + nw);
447  } else if (n == 4) {
448  cftfsub(n, a, ip, nw, w);
449  }
450  xr = a[0] - a[1];
451  a[0] += a[1];
452  for (j = 2; j < n; j += 2) {
453  a[j - 1] = -a[j] - a[j + 1];
454  a[j] -= a[j + 1];
455  }
456  a[n - 1] = -xr;
457  }
458 }
459 
460 
461 extern "C" void dfct(int n, double *a, double *t, int *ip, double *w)
462 {
463  void makewt(int nw, int *ip, double *w);
464  void makect(int nc, int *ip, double *c);
465  void cftfsub(int n, double *a, int *ip, int nw, double *w);
466  void rftfsub(int n, double *a, int nc, double *c);
467  void dctsub(int n, double *a, int nc, double *c);
468  int j, k, l, m, mh, nw, nc;
469  double xr, xi, yr, yi;
470 
471  nw = ip[0];
472  if (n > (nw << 3)) {
473  nw = n >> 3;
474  makewt(nw, ip, w);
475  }
476  nc = ip[1];
477  if (n > (nc << 1)) {
478  nc = n >> 1;
479  makect(nc, ip, w + nw);
480  }
481  m = n >> 1;
482  yi = a[m];
483  xi = a[0] + a[n];
484  a[0] -= a[n];
485  t[0] = xi - yi;
486  t[m] = xi + yi;
487  if (n > 2) {
488  mh = m >> 1;
489  for (j = 1; j < mh; j++) {
490  k = m - j;
491  xr = a[j] - a[n - j];
492  xi = a[j] + a[n - j];
493  yr = a[k] - a[n - k];
494  yi = a[k] + a[n - k];
495  a[j] = xr;
496  a[k] = yr;
497  t[j] = xi - yi;
498  t[k] = xi + yi;
499  }
500  t[mh] = a[mh] + a[n - mh];
501  a[mh] -= a[n - mh];
502  dctsub(m, a, nc, w + nw);
503  if (m > 4) {
504  cftfsub(m, a, ip, nw, w);
505  rftfsub(m, a, nc, w + nw);
506  } else if (m == 4) {
507  cftfsub(m, a, ip, nw, w);
508  }
509  a[n - 1] = a[0] - a[1];
510  a[1] = a[0] + a[1];
511  for (j = m - 2; j >= 2; j -= 2) {
512  a[2 * j + 1] = a[j] + a[j + 1];
513  a[2 * j - 1] = a[j] - a[j + 1];
514  }
515  l = 2;
516  m = mh;
517  while (m >= 2) {
518  dctsub(m, t, nc, w + nw);
519  if (m > 4) {
520  cftfsub(m, t, ip, nw, w);
521  rftfsub(m, t, nc, w + nw);
522  } else if (m == 4) {
523  cftfsub(m, t, ip, nw, w);
524  }
525  a[n - l] = t[0] - t[1];
526  a[l] = t[0] + t[1];
527  k = 0;
528  for (j = 2; j < m; j += 2) {
529  k += l << 2;
530  a[k - l] = t[j] - t[j + 1];
531  a[k + l] = t[j] + t[j + 1];
532  }
533  l <<= 1;
534  mh = m >> 1;
535  for (j = 0; j < mh; j++) {
536  k = m - j;
537  t[j] = t[m + k] - t[m + j];
538  t[k] = t[m + k] + t[m + j];
539  }
540  t[mh] = t[m + mh];
541  m = mh;
542  }
543  a[l] = t[0];
544  a[n] = t[2] - t[1];
545  a[0] = t[2] + t[1];
546  } else {
547  a[1] = a[0];
548  a[2] = t[0];
549  a[0] = t[1];
550  }
551 }
552 
553 
554 extern "C" void dfst(int n, double *a, double *t, int *ip, double *w)
555 {
556  void makewt(int nw, int *ip, double *w);
557  void makect(int nc, int *ip, double *c);
558  void cftfsub(int n, double *a, int *ip, int nw, double *w);
559  void rftfsub(int n, double *a, int nc, double *c);
560  void dstsub(int n, double *a, int nc, double *c);
561  int j, k, l, m, mh, nw, nc;
562  double xr, xi, yr, yi;
563 
564  nw = ip[0];
565  if (n > (nw << 3)) {
566  nw = n >> 3;
567  makewt(nw, ip, w);
568  }
569  nc = ip[1];
570  if (n > (nc << 1)) {
571  nc = n >> 1;
572  makect(nc, ip, w + nw);
573  }
574  if (n > 2) {
575  m = n >> 1;
576  mh = m >> 1;
577  for (j = 1; j < mh; j++) {
578  k = m - j;
579  xr = a[j] + a[n - j];
580  xi = a[j] - a[n - j];
581  yr = a[k] + a[n - k];
582  yi = a[k] - a[n - k];
583  a[j] = xr;
584  a[k] = yr;
585  t[j] = xi + yi;
586  t[k] = xi - yi;
587  }
588  t[0] = a[mh] - a[n - mh];
589  a[mh] += a[n - mh];
590  a[0] = a[m];
591  dstsub(m, a, nc, w + nw);
592  if (m > 4) {
593  cftfsub(m, a, ip, nw, w);
594  rftfsub(m, a, nc, w + nw);
595  } else if (m == 4) {
596  cftfsub(m, a, ip, nw, w);
597  }
598  a[n - 1] = a[1] - a[0];
599  a[1] = a[0] + a[1];
600  for (j = m - 2; j >= 2; j -= 2) {
601  a[2 * j + 1] = a[j] - a[j + 1];
602  a[2 * j - 1] = -a[j] - a[j + 1];
603  }
604  l = 2;
605  m = mh;
606  while (m >= 2) {
607  dstsub(m, t, nc, w + nw);
608  if (m > 4) {
609  cftfsub(m, t, ip, nw, w);
610  rftfsub(m, t, nc, w + nw);
611  } else if (m == 4) {
612  cftfsub(m, t, ip, nw, w);
613  }
614  a[n - l] = t[1] - t[0];
615  a[l] = t[0] + t[1];
616  k = 0;
617  for (j = 2; j < m; j += 2) {
618  k += l << 2;
619  a[k - l] = -t[j] - t[j + 1];
620  a[k + l] = t[j] - t[j + 1];
621  }
622  l <<= 1;
623  mh = m >> 1;
624  for (j = 1; j < mh; j++) {
625  k = m - j;
626  t[j] = t[m + k] + t[m + j];
627  t[k] = t[m + k] - t[m + j];
628  }
629  t[0] = t[m + mh];
630  m = mh;
631  }
632  a[l] = t[0];
633  }
634  a[0] = 0;
635 }
636 
637 
638 /* -------- initializing routines -------- */
639 
640 
641 #include <math.h>
642 
643 extern "C" void makewt(int nw, int *ip, double *w)
644 {
645  void makeipt(int nw, int *ip);
646  int j, nwh, nw0, nw1;
647  double delta, wn4r, wk1r, wk1i, wk3r, wk3i;
648 
649  ip[0] = nw;
650  ip[1] = 1;
651  if (nw > 2) {
652  nwh = nw >> 1;
653  delta = atan(1.0) / nwh;
654  wn4r = cos(delta * nwh);
655  w[0] = 1;
656  w[1] = wn4r;
657  if (nwh == 4) {
658  w[2] = cos(delta * 2);
659  w[3] = sin(delta * 2);
660  } else if (nwh > 4) {
661  makeipt(nw, ip);
662  w[2] = 0.5 / cos(delta * 2);
663  w[3] = 0.5 / cos(delta * 6);
664  for (j = 4; j < nwh; j += 4) {
665  w[j] = cos(delta * j);
666  w[j + 1] = sin(delta * j);
667  w[j + 2] = cos(3 * delta * j);
668  w[j + 3] = -sin(3 * delta * j);
669  }
670  }
671  nw0 = 0;
672  while (nwh > 2) {
673  nw1 = nw0 + nwh;
674  nwh >>= 1;
675  w[nw1] = 1;
676  w[nw1 + 1] = wn4r;
677  if (nwh == 4) {
678  wk1r = w[nw0 + 4];
679  wk1i = w[nw0 + 5];
680  w[nw1 + 2] = wk1r;
681  w[nw1 + 3] = wk1i;
682  } else if (nwh > 4) {
683  wk1r = w[nw0 + 4];
684  wk3r = w[nw0 + 6];
685  w[nw1 + 2] = 0.5 / wk1r;
686  w[nw1 + 3] = 0.5 / wk3r;
687  for (j = 4; j < nwh; j += 4) {
688  wk1r = w[nw0 + 2 * j];
689  wk1i = w[nw0 + 2 * j + 1];
690  wk3r = w[nw0 + 2 * j + 2];
691  wk3i = w[nw0 + 2 * j + 3];
692  w[nw1 + j] = wk1r;
693  w[nw1 + j + 1] = wk1i;
694  w[nw1 + j + 2] = wk3r;
695  w[nw1 + j + 3] = wk3i;
696  }
697  }
698  nw0 = nw1;
699  }
700  }
701 }
702 
703 
704 extern "C" void makeipt(int nw, int *ip)
705 {
706  int j, l, m, m2, p, q;
707 
708  ip[2] = 0;
709  ip[3] = 16;
710  m = 2;
711  for (l = nw; l > 32; l >>= 2) {
712  m2 = m << 1;
713  q = m2 << 3;
714  for (j = m; j < m2; j++) {
715  p = ip[j] << 2;
716  ip[m + j] = p;
717  ip[m2 + j] = p + q;
718  }
719  m = m2;
720  }
721 }
722 
723 
724 extern "C" void makect(int nc, int *ip, double *c)
725 {
726  int j, nch;
727  double delta;
728 
729  ip[1] = nc;
730  if (nc > 1) {
731  nch = nc >> 1;
732  delta = atan(1.0) / nch;
733  c[0] = cos(delta * nch);
734  c[nch] = 0.5 * c[0];
735  for (j = 1; j < nch; j++) {
736  c[j] = 0.5 * cos(delta * j);
737  c[nc - j] = 0.5 * sin(delta * j);
738  }
739  }
740 }
741 
742 
743 /* -------- child routines -------- */
744 
745 
746 #ifdef USE_CDFT_PTHREADS
747 #define USE_CDFT_THREADS
748 #ifndef CDFT_THREADS_BEGIN_N
749 #define CDFT_THREADS_BEGIN_N 8192
750 #endif
751 #ifndef CDFT_4THREADS_BEGIN_N
752 #define CDFT_4THREADS_BEGIN_N 65536
753 #endif
754 #include <pthread.h>
755 #include <stdio.h>
756 #include <stdlib.h>
757 #define cdft_thread_t pthread_t
758 #define cdft_thread_create(thp,func,argp) { \
759  if (pthread_create(thp, NULL, func, (void *) argp) != 0) { \
760  fprintf(stderr, "cdft thread error\n"); \
761  exit(1); \
762  } \
763 }
764 #define cdft_thread_wait(th) { \
765  if (pthread_join(th, NULL) != 0) { \
766  fprintf(stderr, "cdft thread error\n"); \
767  exit(1); \
768  } \
769 }
770 #endif /* USE_CDFT_PTHREADS */
771 
772 
773 #ifdef USE_CDFT_WINTHREADS
774 #define USE_CDFT_THREADS
775 #ifndef CDFT_THREADS_BEGIN_N
776 #define CDFT_THREADS_BEGIN_N 32768
777 #endif
778 #ifndef CDFT_4THREADS_BEGIN_N
779 #define CDFT_4THREADS_BEGIN_N 524288
780 #endif
781 #include <windows.h>
782 #include <stdio.h>
783 #include <stdlib.h>
784 #define cdft_thread_t HANDLE
785 #define cdft_thread_create(thp,func,argp) { \
786  DWORD thid; \
787  *(thp) = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE) func, (LPVOID) argp, 0, &thid); \
788  if (*(thp) == 0) { \
789  fprintf(stderr, "cdft thread error\n"); \
790  exit(1); \
791  } \
792 }
793 #define cdft_thread_wait(th) { \
794  WaitForSingleObject(th, INFINITE); \
795  CloseHandle(th); \
796 }
797 #endif /* USE_CDFT_WINTHREADS */
798 
799 
800 extern "C" void cftfsub(int n, double *a, int *ip, int nw, double *w)
801 {
802  void bitrv2(int n, int *ip, double *a);
803  void bitrv216(double *a);
804  void bitrv208(double *a);
805  void cftf1st(int n, double *a, double *w);
806  void cftrec4(int n, double *a, int nw, double *w);
807  void cftleaf(int n, int isplt, double *a, int nw, double *w);
808  void cftfx41(int n, double *a, int nw, double *w);
809  void cftf161(double *a, double *w);
810  void cftf081(double *a, double *w);
811  void cftf040(double *a);
812  void cftx020(double *a);
813 #ifdef USE_CDFT_THREADS
814  void cftrec4_th(int n, double *a, int nw, double *w);
815 #endif /* USE_CDFT_THREADS */
816 
817  if (n > 8) {
818  if (n > 32) {
819  cftf1st(n, a, &w[nw - (n >> 2)]);
820 #ifdef USE_CDFT_THREADS
821  if (n > CDFT_THREADS_BEGIN_N) {
822  cftrec4_th(n, a, nw, w);
823  } else
824 #endif /* USE_CDFT_THREADS */
825  if (n > 512) {
826  cftrec4(n, a, nw, w);
827  } else if (n > 128) {
828  cftleaf(n, 1, a, nw, w);
829  } else {
830  cftfx41(n, a, nw, w);
831  }
832  bitrv2(n, ip, a);
833  } else if (n == 32) {
834  cftf161(a, &w[nw - 8]);
835  bitrv216(a);
836  } else {
837  cftf081(a, w);
838  bitrv208(a);
839  }
840  } else if (n == 8) {
841  cftf040(a);
842  } else if (n == 4) {
843  cftx020(a);
844  }
845 }
846 
847 
848 extern "C" void cftbsub(int n, double *a, int *ip, int nw, double *w)
849 {
850  void bitrv2conj(int n, int *ip, double *a);
851  void bitrv216neg(double *a);
852  void bitrv208neg(double *a);
853  void cftb1st(int n, double *a, double *w);
854  void cftrec4(int n, double *a, int nw, double *w);
855  void cftleaf(int n, int isplt, double *a, int nw, double *w);
856  void cftfx41(int n, double *a, int nw, double *w);
857  void cftf161(double *a, double *w);
858  void cftf081(double *a, double *w);
859  void cftb040(double *a);
860  void cftx020(double *a);
861 #ifdef USE_CDFT_THREADS
862  void cftrec4_th(int n, double *a, int nw, double *w);
863 #endif /* USE_CDFT_THREADS */
864 
865  if (n > 8) {
866  if (n > 32) {
867  cftb1st(n, a, &w[nw - (n >> 2)]);
868 #ifdef USE_CDFT_THREADS
869  if (n > CDFT_THREADS_BEGIN_N) {
870  cftrec4_th(n, a, nw, w);
871  } else
872 #endif /* USE_CDFT_THREADS */
873  if (n > 512) {
874  cftrec4(n, a, nw, w);
875  } else if (n > 128) {
876  cftleaf(n, 1, a, nw, w);
877  } else {
878  cftfx41(n, a, nw, w);
879  }
880  bitrv2conj(n, ip, a);
881  } else if (n == 32) {
882  cftf161(a, &w[nw - 8]);
883  bitrv216neg(a);
884  } else {
885  cftf081(a, w);
886  bitrv208neg(a);
887  }
888  } else if (n == 8) {
889  cftb040(a);
890  } else if (n == 4) {
891  cftx020(a);
892  }
893 }
894 
895 
896 extern "C" void bitrv2(int n, int *ip, double *a)
897 {
898  int j, j1, k, k1, l, m, nh, nm;
899  double xr, xi, yr, yi;
900 
901  m = 1;
902  for (l = n >> 2; l > 8; l >>= 2) {
903  m <<= 1;
904  }
905  nh = n >> 1;
906  nm = 4 * m;
907  if (l == 8) {
908  for (k = 0; k < m; k++) {
909  for (j = 0; j < k; j++) {
910  j1 = 4 * j + 2 * ip[m + k];
911  k1 = 4 * k + 2 * ip[m + j];
912  xr = a[j1];
913  xi = a[j1 + 1];
914  yr = a[k1];
915  yi = a[k1 + 1];
916  a[j1] = yr;
917  a[j1 + 1] = yi;
918  a[k1] = xr;
919  a[k1 + 1] = xi;
920  j1 += nm;
921  k1 += 2 * nm;
922  xr = a[j1];
923  xi = a[j1 + 1];
924  yr = a[k1];
925  yi = a[k1 + 1];
926  a[j1] = yr;
927  a[j1 + 1] = yi;
928  a[k1] = xr;
929  a[k1 + 1] = xi;
930  j1 += nm;
931  k1 -= nm;
932  xr = a[j1];
933  xi = a[j1 + 1];
934  yr = a[k1];
935  yi = a[k1 + 1];
936  a[j1] = yr;
937  a[j1 + 1] = yi;
938  a[k1] = xr;
939  a[k1 + 1] = xi;
940  j1 += nm;
941  k1 += 2 * nm;
942  xr = a[j1];
943  xi = a[j1 + 1];
944  yr = a[k1];
945  yi = a[k1 + 1];
946  a[j1] = yr;
947  a[j1 + 1] = yi;
948  a[k1] = xr;
949  a[k1 + 1] = xi;
950  j1 += nh;
951  k1 += 2;
952  xr = a[j1];
953  xi = a[j1 + 1];
954  yr = a[k1];
955  yi = a[k1 + 1];
956  a[j1] = yr;
957  a[j1 + 1] = yi;
958  a[k1] = xr;
959  a[k1 + 1] = xi;
960  j1 -= nm;
961  k1 -= 2 * nm;
962  xr = a[j1];
963  xi = a[j1 + 1];
964  yr = a[k1];
965  yi = a[k1 + 1];
966  a[j1] = yr;
967  a[j1 + 1] = yi;
968  a[k1] = xr;
969  a[k1 + 1] = xi;
970  j1 -= nm;
971  k1 += nm;
972  xr = a[j1];
973  xi = a[j1 + 1];
974  yr = a[k1];
975  yi = a[k1 + 1];
976  a[j1] = yr;
977  a[j1 + 1] = yi;
978  a[k1] = xr;
979  a[k1 + 1] = xi;
980  j1 -= nm;
981  k1 -= 2 * nm;
982  xr = a[j1];
983  xi = a[j1 + 1];
984  yr = a[k1];
985  yi = a[k1 + 1];
986  a[j1] = yr;
987  a[j1 + 1] = yi;
988  a[k1] = xr;
989  a[k1 + 1] = xi;
990  j1 += 2;
991  k1 += nh;
992  xr = a[j1];
993  xi = a[j1 + 1];
994  yr = a[k1];
995  yi = a[k1 + 1];
996  a[j1] = yr;
997  a[j1 + 1] = yi;
998  a[k1] = xr;
999  a[k1 + 1] = xi;
1000  j1 += nm;
1001  k1 += 2 * nm;
1002  xr = a[j1];
1003  xi = a[j1 + 1];
1004  yr = a[k1];
1005  yi = a[k1 + 1];
1006  a[j1] = yr;
1007  a[j1 + 1] = yi;
1008  a[k1] = xr;
1009  a[k1 + 1] = xi;
1010  j1 += nm;
1011  k1 -= nm;
1012  xr = a[j1];
1013  xi = a[j1 + 1];
1014  yr = a[k1];
1015  yi = a[k1 + 1];
1016  a[j1] = yr;
1017  a[j1 + 1] = yi;
1018  a[k1] = xr;
1019  a[k1 + 1] = xi;
1020  j1 += nm;
1021  k1 += 2 * nm;
1022  xr = a[j1];
1023  xi = a[j1 + 1];
1024  yr = a[k1];
1025  yi = a[k1 + 1];
1026  a[j1] = yr;
1027  a[j1 + 1] = yi;
1028  a[k1] = xr;
1029  a[k1 + 1] = xi;
1030  j1 -= nh;
1031  k1 -= 2;
1032  xr = a[j1];
1033  xi = a[j1 + 1];
1034  yr = a[k1];
1035  yi = a[k1 + 1];
1036  a[j1] = yr;
1037  a[j1 + 1] = yi;
1038  a[k1] = xr;
1039  a[k1 + 1] = xi;
1040  j1 -= nm;
1041  k1 -= 2 * nm;
1042  xr = a[j1];
1043  xi = a[j1 + 1];
1044  yr = a[k1];
1045  yi = a[k1 + 1];
1046  a[j1] = yr;
1047  a[j1 + 1] = yi;
1048  a[k1] = xr;
1049  a[k1 + 1] = xi;
1050  j1 -= nm;
1051  k1 += nm;
1052  xr = a[j1];
1053  xi = a[j1 + 1];
1054  yr = a[k1];
1055  yi = a[k1 + 1];
1056  a[j1] = yr;
1057  a[j1 + 1] = yi;
1058  a[k1] = xr;
1059  a[k1 + 1] = xi;
1060  j1 -= nm;
1061  k1 -= 2 * nm;
1062  xr = a[j1];
1063  xi = a[j1 + 1];
1064  yr = a[k1];
1065  yi = a[k1 + 1];
1066  a[j1] = yr;
1067  a[j1 + 1] = yi;
1068  a[k1] = xr;
1069  a[k1 + 1] = xi;
1070  }
1071  k1 = 4 * k + 2 * ip[m + k];
1072  j1 = k1 + 2;
1073  k1 += nh;
1074  xr = a[j1];
1075  xi = a[j1 + 1];
1076  yr = a[k1];
1077  yi = a[k1 + 1];
1078  a[j1] = yr;
1079  a[j1 + 1] = yi;
1080  a[k1] = xr;
1081  a[k1 + 1] = xi;
1082  j1 += nm;
1083  k1 += 2 * nm;
1084  xr = a[j1];
1085  xi = a[j1 + 1];
1086  yr = a[k1];
1087  yi = a[k1 + 1];
1088  a[j1] = yr;
1089  a[j1 + 1] = yi;
1090  a[k1] = xr;
1091  a[k1 + 1] = xi;
1092  j1 += nm;
1093  k1 -= nm;
1094  xr = a[j1];
1095  xi = a[j1 + 1];
1096  yr = a[k1];
1097  yi = a[k1 + 1];
1098  a[j1] = yr;
1099  a[j1 + 1] = yi;
1100  a[k1] = xr;
1101  a[k1 + 1] = xi;
1102  j1 -= 2;
1103  k1 -= nh;
1104  xr = a[j1];
1105  xi = a[j1 + 1];
1106  yr = a[k1];
1107  yi = a[k1 + 1];
1108  a[j1] = yr;
1109  a[j1 + 1] = yi;
1110  a[k1] = xr;
1111  a[k1 + 1] = xi;
1112  j1 += nh + 2;
1113  k1 += nh + 2;
1114  xr = a[j1];
1115  xi = a[j1 + 1];
1116  yr = a[k1];
1117  yi = a[k1 + 1];
1118  a[j1] = yr;
1119  a[j1 + 1] = yi;
1120  a[k1] = xr;
1121  a[k1 + 1] = xi;
1122  j1 -= nh - nm;
1123  k1 += 2 * nm - 2;
1124  xr = a[j1];
1125  xi = a[j1 + 1];
1126  yr = a[k1];
1127  yi = a[k1 + 1];
1128  a[j1] = yr;
1129  a[j1 + 1] = yi;
1130  a[k1] = xr;
1131  a[k1 + 1] = xi;
1132  }
1133  } else {
1134  for (k = 0; k < m; k++) {
1135  for (j = 0; j < k; j++) {
1136  j1 = 4 * j + ip[m + k];
1137  k1 = 4 * k + ip[m + j];
1138  xr = a[j1];
1139  xi = a[j1 + 1];
1140  yr = a[k1];
1141  yi = a[k1 + 1];
1142  a[j1] = yr;
1143  a[j1 + 1] = yi;
1144  a[k1] = xr;
1145  a[k1 + 1] = xi;
1146  j1 += nm;
1147  k1 += nm;
1148  xr = a[j1];
1149  xi = a[j1 + 1];
1150  yr = a[k1];
1151  yi = a[k1 + 1];
1152  a[j1] = yr;
1153  a[j1 + 1] = yi;
1154  a[k1] = xr;
1155  a[k1 + 1] = xi;
1156  j1 += nh;
1157  k1 += 2;
1158  xr = a[j1];
1159  xi = a[j1 + 1];
1160  yr = a[k1];
1161  yi = a[k1 + 1];
1162  a[j1] = yr;
1163  a[j1 + 1] = yi;
1164  a[k1] = xr;
1165  a[k1 + 1] = xi;
1166  j1 -= nm;
1167  k1 -= nm;
1168  xr = a[j1];
1169  xi = a[j1 + 1];
1170  yr = a[k1];
1171  yi = a[k1 + 1];
1172  a[j1] = yr;
1173  a[j1 + 1] = yi;
1174  a[k1] = xr;
1175  a[k1 + 1] = xi;
1176  j1 += 2;
1177  k1 += nh;
1178  xr = a[j1];
1179  xi = a[j1 + 1];
1180  yr = a[k1];
1181  yi = a[k1 + 1];
1182  a[j1] = yr;
1183  a[j1 + 1] = yi;
1184  a[k1] = xr;
1185  a[k1 + 1] = xi;
1186  j1 += nm;
1187  k1 += nm;
1188  xr = a[j1];
1189  xi = a[j1 + 1];
1190  yr = a[k1];
1191  yi = a[k1 + 1];
1192  a[j1] = yr;
1193  a[j1 + 1] = yi;
1194  a[k1] = xr;
1195  a[k1 + 1] = xi;
1196  j1 -= nh;
1197  k1 -= 2;
1198  xr = a[j1];
1199  xi = a[j1 + 1];
1200  yr = a[k1];
1201  yi = a[k1 + 1];
1202  a[j1] = yr;
1203  a[j1 + 1] = yi;
1204  a[k1] = xr;
1205  a[k1 + 1] = xi;
1206  j1 -= nm;
1207  k1 -= nm;
1208  xr = a[j1];
1209  xi = a[j1 + 1];
1210  yr = a[k1];
1211  yi = a[k1 + 1];
1212  a[j1] = yr;
1213  a[j1 + 1] = yi;
1214  a[k1] = xr;
1215  a[k1 + 1] = xi;
1216  }
1217  k1 = 4 * k + ip[m + k];
1218  j1 = k1 + 2;
1219  k1 += nh;
1220  xr = a[j1];
1221  xi = a[j1 + 1];
1222  yr = a[k1];
1223  yi = a[k1 + 1];
1224  a[j1] = yr;
1225  a[j1 + 1] = yi;
1226  a[k1] = xr;
1227  a[k1 + 1] = xi;
1228  j1 += nm;
1229  k1 += nm;
1230  xr = a[j1];
1231  xi = a[j1 + 1];
1232  yr = a[k1];
1233  yi = a[k1 + 1];
1234  a[j1] = yr;
1235  a[j1 + 1] = yi;
1236  a[k1] = xr;
1237  a[k1 + 1] = xi;
1238  }
1239  }
1240 }
1241 
1242 
1243 extern "C" void bitrv2conj(int n, int *ip, double *a)
1244 {
1245  int j, j1, k, k1, l, m, nh, nm;
1246  double xr, xi, yr, yi;
1247 
1248  m = 1;
1249  for (l = n >> 2; l > 8; l >>= 2) {
1250  m <<= 1;
1251  }
1252  nh = n >> 1;
1253  nm = 4 * m;
1254  if (l == 8) {
1255  for (k = 0; k < m; k++) {
1256  for (j = 0; j < k; j++) {
1257  j1 = 4 * j + 2 * ip[m + k];
1258  k1 = 4 * k + 2 * ip[m + j];
1259  xr = a[j1];
1260  xi = -a[j1 + 1];
1261  yr = a[k1];
1262  yi = -a[k1 + 1];
1263  a[j1] = yr;
1264  a[j1 + 1] = yi;
1265  a[k1] = xr;
1266  a[k1 + 1] = xi;
1267  j1 += nm;
1268  k1 += 2 * nm;
1269  xr = a[j1];
1270  xi = -a[j1 + 1];
1271  yr = a[k1];
1272  yi = -a[k1 + 1];
1273  a[j1] = yr;
1274  a[j1 + 1] = yi;
1275  a[k1] = xr;
1276  a[k1 + 1] = xi;
1277  j1 += nm;
1278  k1 -= nm;
1279  xr = a[j1];
1280  xi = -a[j1 + 1];
1281  yr = a[k1];
1282  yi = -a[k1 + 1];
1283  a[j1] = yr;
1284  a[j1 + 1] = yi;
1285  a[k1] = xr;
1286  a[k1 + 1] = xi;
1287  j1 += nm;
1288  k1 += 2 * nm;
1289  xr = a[j1];
1290  xi = -a[j1 + 1];
1291  yr = a[k1];
1292  yi = -a[k1 + 1];
1293  a[j1] = yr;
1294  a[j1 + 1] = yi;
1295  a[k1] = xr;
1296  a[k1 + 1] = xi;
1297  j1 += nh;
1298  k1 += 2;
1299  xr = a[j1];
1300  xi = -a[j1 + 1];
1301  yr = a[k1];
1302  yi = -a[k1 + 1];
1303  a[j1] = yr;
1304  a[j1 + 1] = yi;
1305  a[k1] = xr;
1306  a[k1 + 1] = xi;
1307  j1 -= nm;
1308  k1 -= 2 * nm;
1309  xr = a[j1];
1310  xi = -a[j1 + 1];
1311  yr = a[k1];
1312  yi = -a[k1 + 1];
1313  a[j1] = yr;
1314  a[j1 + 1] = yi;
1315  a[k1] = xr;
1316  a[k1 + 1] = xi;
1317  j1 -= nm;
1318  k1 += nm;
1319  xr = a[j1];
1320  xi = -a[j1 + 1];
1321  yr = a[k1];
1322  yi = -a[k1 + 1];
1323  a[j1] = yr;
1324  a[j1 + 1] = yi;
1325  a[k1] = xr;
1326  a[k1 + 1] = xi;
1327  j1 -= nm;
1328  k1 -= 2 * nm;
1329  xr = a[j1];
1330  xi = -a[j1 + 1];
1331  yr = a[k1];
1332  yi = -a[k1 + 1];
1333  a[j1] = yr;
1334  a[j1 + 1] = yi;
1335  a[k1] = xr;
1336  a[k1 + 1] = xi;
1337  j1 += 2;
1338  k1 += nh;
1339  xr = a[j1];
1340  xi = -a[j1 + 1];
1341  yr = a[k1];
1342  yi = -a[k1 + 1];
1343  a[j1] = yr;
1344  a[j1 + 1] = yi;
1345  a[k1] = xr;
1346  a[k1 + 1] = xi;
1347  j1 += nm;
1348  k1 += 2 * nm;
1349  xr = a[j1];
1350  xi = -a[j1 + 1];
1351  yr = a[k1];
1352  yi = -a[k1 + 1];
1353  a[j1] = yr;
1354  a[j1 + 1] = yi;
1355  a[k1] = xr;
1356  a[k1 + 1] = xi;
1357  j1 += nm;
1358  k1 -= nm;
1359  xr = a[j1];
1360  xi = -a[j1 + 1];
1361  yr = a[k1];
1362  yi = -a[k1 + 1];
1363  a[j1] = yr;
1364  a[j1 + 1] = yi;
1365  a[k1] = xr;
1366  a[k1 + 1] = xi;
1367  j1 += nm;
1368  k1 += 2 * nm;
1369  xr = a[j1];
1370  xi = -a[j1 + 1];
1371  yr = a[k1];
1372  yi = -a[k1 + 1];
1373  a[j1] = yr;
1374  a[j1 + 1] = yi;
1375  a[k1] = xr;
1376  a[k1 + 1] = xi;
1377  j1 -= nh;
1378  k1 -= 2;
1379  xr = a[j1];
1380  xi = -a[j1 + 1];
1381  yr = a[k1];
1382  yi = -a[k1 + 1];
1383  a[j1] = yr;
1384  a[j1 + 1] = yi;
1385  a[k1] = xr;
1386  a[k1 + 1] = xi;
1387  j1 -= nm;
1388  k1 -= 2 * nm;
1389  xr = a[j1];
1390  xi = -a[j1 + 1];
1391  yr = a[k1];
1392  yi = -a[k1 + 1];
1393  a[j1] = yr;
1394  a[j1 + 1] = yi;
1395  a[k1] = xr;
1396  a[k1 + 1] = xi;
1397  j1 -= nm;
1398  k1 += nm;
1399  xr = a[j1];
1400  xi = -a[j1 + 1];
1401  yr = a[k1];
1402  yi = -a[k1 + 1];
1403  a[j1] = yr;
1404  a[j1 + 1] = yi;
1405  a[k1] = xr;
1406  a[k1 + 1] = xi;
1407  j1 -= nm;
1408  k1 -= 2 * nm;
1409  xr = a[j1];
1410  xi = -a[j1 + 1];
1411  yr = a[k1];
1412  yi = -a[k1 + 1];
1413  a[j1] = yr;
1414  a[j1 + 1] = yi;
1415  a[k1] = xr;
1416  a[k1 + 1] = xi;
1417  }
1418  k1 = 4 * k + 2 * ip[m + k];
1419  j1 = k1 + 2;
1420  k1 += nh;
1421  a[j1 - 1] = -a[j1 - 1];
1422  xr = a[j1];
1423  xi = -a[j1 + 1];
1424  yr = a[k1];
1425  yi = -a[k1 + 1];
1426  a[j1] = yr;
1427  a[j1 + 1] = yi;
1428  a[k1] = xr;
1429  a[k1 + 1] = xi;
1430  a[k1 + 3] = -a[k1 + 3];
1431  j1 += nm;
1432  k1 += 2 * nm;
1433  xr = a[j1];
1434  xi = -a[j1 + 1];
1435  yr = a[k1];
1436  yi = -a[k1 + 1];
1437  a[j1] = yr;
1438  a[j1 + 1] = yi;
1439  a[k1] = xr;
1440  a[k1 + 1] = xi;
1441  j1 += nm;
1442  k1 -= nm;
1443  xr = a[j1];
1444  xi = -a[j1 + 1];
1445  yr = a[k1];
1446  yi = -a[k1 + 1];
1447  a[j1] = yr;
1448  a[j1 + 1] = yi;
1449  a[k1] = xr;
1450  a[k1 + 1] = xi;
1451  j1 -= 2;
1452  k1 -= nh;
1453  xr = a[j1];
1454  xi = -a[j1 + 1];
1455  yr = a[k1];
1456  yi = -a[k1 + 1];
1457  a[j1] = yr;
1458  a[j1 + 1] = yi;
1459  a[k1] = xr;
1460  a[k1 + 1] = xi;
1461  j1 += nh + 2;
1462  k1 += nh + 2;
1463  xr = a[j1];
1464  xi = -a[j1 + 1];
1465  yr = a[k1];
1466  yi = -a[k1 + 1];
1467  a[j1] = yr;
1468  a[j1 + 1] = yi;
1469  a[k1] = xr;
1470  a[k1 + 1] = xi;
1471  j1 -= nh - nm;
1472  k1 += 2 * nm - 2;
1473  a[j1 - 1] = -a[j1 - 1];
1474  xr = a[j1];
1475  xi = -a[j1 + 1];
1476  yr = a[k1];
1477  yi = -a[k1 + 1];
1478  a[j1] = yr;
1479  a[j1 + 1] = yi;
1480  a[k1] = xr;
1481  a[k1 + 1] = xi;
1482  a[k1 + 3] = -a[k1 + 3];
1483  }
1484  } else {
1485  for (k = 0; k < m; k++) {
1486  for (j = 0; j < k; j++) {
1487  j1 = 4 * j + ip[m + k];
1488  k1 = 4 * k + ip[m + j];
1489  xr = a[j1];
1490  xi = -a[j1 + 1];
1491  yr = a[k1];
1492  yi = -a[k1 + 1];
1493  a[j1] = yr;
1494  a[j1 + 1] = yi;
1495  a[k1] = xr;
1496  a[k1 + 1] = xi;
1497  j1 += nm;
1498  k1 += nm;
1499  xr = a[j1];
1500  xi = -a[j1 + 1];
1501  yr = a[k1];
1502  yi = -a[k1 + 1];
1503  a[j1] = yr;
1504  a[j1 + 1] = yi;
1505  a[k1] = xr;
1506  a[k1 + 1] = xi;
1507  j1 += nh;
1508  k1 += 2;
1509  xr = a[j1];
1510  xi = -a[j1 + 1];
1511  yr = a[k1];
1512  yi = -a[k1 + 1];
1513  a[j1] = yr;
1514  a[j1 + 1] = yi;
1515  a[k1] = xr;
1516  a[k1 + 1] = xi;
1517  j1 -= nm;
1518  k1 -= nm;
1519  xr = a[j1];
1520  xi = -a[j1 + 1];
1521  yr = a[k1];
1522  yi = -a[k1 + 1];
1523  a[j1] = yr;
1524  a[j1 + 1] = yi;
1525  a[k1] = xr;
1526  a[k1 + 1] = xi;
1527  j1 += 2;
1528  k1 += nh;
1529  xr = a[j1];
1530  xi = -a[j1 + 1];
1531  yr = a[k1];
1532  yi = -a[k1 + 1];
1533  a[j1] = yr;
1534  a[j1 + 1] = yi;
1535  a[k1] = xr;
1536  a[k1 + 1] = xi;
1537  j1 += nm;
1538  k1 += nm;
1539  xr = a[j1];
1540  xi = -a[j1 + 1];
1541  yr = a[k1];
1542  yi = -a[k1 + 1];
1543  a[j1] = yr;
1544  a[j1 + 1] = yi;
1545  a[k1] = xr;
1546  a[k1 + 1] = xi;
1547  j1 -= nh;
1548  k1 -= 2;
1549  xr = a[j1];
1550  xi = -a[j1 + 1];
1551  yr = a[k1];
1552  yi = -a[k1 + 1];
1553  a[j1] = yr;
1554  a[j1 + 1] = yi;
1555  a[k1] = xr;
1556  a[k1 + 1] = xi;
1557  j1 -= nm;
1558  k1 -= nm;
1559  xr = a[j1];
1560  xi = -a[j1 + 1];
1561  yr = a[k1];
1562  yi = -a[k1 + 1];
1563  a[j1] = yr;
1564  a[j1 + 1] = yi;
1565  a[k1] = xr;
1566  a[k1 + 1] = xi;
1567  }
1568  k1 = 4 * k + ip[m + k];
1569  j1 = k1 + 2;
1570  k1 += nh;
1571  a[j1 - 1] = -a[j1 - 1];
1572  xr = a[j1];
1573  xi = -a[j1 + 1];
1574  yr = a[k1];
1575  yi = -a[k1 + 1];
1576  a[j1] = yr;
1577  a[j1 + 1] = yi;
1578  a[k1] = xr;
1579  a[k1 + 1] = xi;
1580  a[k1 + 3] = -a[k1 + 3];
1581  j1 += nm;
1582  k1 += nm;
1583  a[j1 - 1] = -a[j1 - 1];
1584  xr = a[j1];
1585  xi = -a[j1 + 1];
1586  yr = a[k1];
1587  yi = -a[k1 + 1];
1588  a[j1] = yr;
1589  a[j1 + 1] = yi;
1590  a[k1] = xr;
1591  a[k1 + 1] = xi;
1592  a[k1 + 3] = -a[k1 + 3];
1593  }
1594  }
1595 }
1596 
1597 
1598 extern "C" void bitrv216(double *a)
1599 {
1600  double x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i,
1601  x5r, x5i, x7r, x7i, x8r, x8i, x10r, x10i,
1602  x11r, x11i, x12r, x12i, x13r, x13i, x14r, x14i;
1603 
1604  x1r = a[2];
1605  x1i = a[3];
1606  x2r = a[4];
1607  x2i = a[5];
1608  x3r = a[6];
1609  x3i = a[7];
1610  x4r = a[8];
1611  x4i = a[9];
1612  x5r = a[10];
1613  x5i = a[11];
1614  x7r = a[14];
1615  x7i = a[15];
1616  x8r = a[16];
1617  x8i = a[17];
1618  x10r = a[20];
1619  x10i = a[21];
1620  x11r = a[22];
1621  x11i = a[23];
1622  x12r = a[24];
1623  x12i = a[25];
1624  x13r = a[26];
1625  x13i = a[27];
1626  x14r = a[28];
1627  x14i = a[29];
1628  a[2] = x8r;
1629  a[3] = x8i;
1630  a[4] = x4r;
1631  a[5] = x4i;
1632  a[6] = x12r;
1633  a[7] = x12i;
1634  a[8] = x2r;
1635  a[9] = x2i;
1636  a[10] = x10r;
1637  a[11] = x10i;
1638  a[14] = x14r;
1639  a[15] = x14i;
1640  a[16] = x1r;
1641  a[17] = x1i;
1642  a[20] = x5r;
1643  a[21] = x5i;
1644  a[22] = x13r;
1645  a[23] = x13i;
1646  a[24] = x3r;
1647  a[25] = x3i;
1648  a[26] = x11r;
1649  a[27] = x11i;
1650  a[28] = x7r;
1651  a[29] = x7i;
1652 }
1653 
1654 
1655 extern "C" void bitrv216neg(double *a)
1656 {
1657  double x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i,
1658  x5r, x5i, x6r, x6i, x7r, x7i, x8r, x8i,
1659  x9r, x9i, x10r, x10i, x11r, x11i, x12r, x12i,
1660  x13r, x13i, x14r, x14i, x15r, x15i;
1661 
1662  x1r = a[2];
1663  x1i = a[3];
1664  x2r = a[4];
1665  x2i = a[5];
1666  x3r = a[6];
1667  x3i = a[7];
1668  x4r = a[8];
1669  x4i = a[9];
1670  x5r = a[10];
1671  x5i = a[11];
1672  x6r = a[12];
1673  x6i = a[13];
1674  x7r = a[14];
1675  x7i = a[15];
1676  x8r = a[16];
1677  x8i = a[17];
1678  x9r = a[18];
1679  x9i = a[19];
1680  x10r = a[20];
1681  x10i = a[21];
1682  x11r = a[22];
1683  x11i = a[23];
1684  x12r = a[24];
1685  x12i = a[25];
1686  x13r = a[26];
1687  x13i = a[27];
1688  x14r = a[28];
1689  x14i = a[29];
1690  x15r = a[30];
1691  x15i = a[31];
1692  a[2] = x15r;
1693  a[3] = x15i;
1694  a[4] = x7r;
1695  a[5] = x7i;
1696  a[6] = x11r;
1697  a[7] = x11i;
1698  a[8] = x3r;
1699  a[9] = x3i;
1700  a[10] = x13r;
1701  a[11] = x13i;
1702  a[12] = x5r;
1703  a[13] = x5i;
1704  a[14] = x9r;
1705  a[15] = x9i;
1706  a[16] = x1r;
1707  a[17] = x1i;
1708  a[18] = x14r;
1709  a[19] = x14i;
1710  a[20] = x6r;
1711  a[21] = x6i;
1712  a[22] = x10r;
1713  a[23] = x10i;
1714  a[24] = x2r;
1715  a[25] = x2i;
1716  a[26] = x12r;
1717  a[27] = x12i;
1718  a[28] = x4r;
1719  a[29] = x4i;
1720  a[30] = x8r;
1721  a[31] = x8i;
1722 }
1723 
1724 
1725 extern "C" void bitrv208(double *a)
1726 {
1727  double x1r, x1i, x3r, x3i, x4r, x4i, x6r, x6i;
1728 
1729  x1r = a[2];
1730  x1i = a[3];
1731  x3r = a[6];
1732  x3i = a[7];
1733  x4r = a[8];
1734  x4i = a[9];
1735  x6r = a[12];
1736  x6i = a[13];
1737  a[2] = x4r;
1738  a[3] = x4i;
1739  a[6] = x6r;
1740  a[7] = x6i;
1741  a[8] = x1r;
1742  a[9] = x1i;
1743  a[12] = x3r;
1744  a[13] = x3i;
1745 }
1746 
1747 
1748 extern "C" void bitrv208neg(double *a)
1749 {
1750  double x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i,
1751  x5r, x5i, x6r, x6i, x7r, x7i;
1752 
1753  x1r = a[2];
1754  x1i = a[3];
1755  x2r = a[4];
1756  x2i = a[5];
1757  x3r = a[6];
1758  x3i = a[7];
1759  x4r = a[8];
1760  x4i = a[9];
1761  x5r = a[10];
1762  x5i = a[11];
1763  x6r = a[12];
1764  x6i = a[13];
1765  x7r = a[14];
1766  x7i = a[15];
1767  a[2] = x7r;
1768  a[3] = x7i;
1769  a[4] = x3r;
1770  a[5] = x3i;
1771  a[6] = x5r;
1772  a[7] = x5i;
1773  a[8] = x1r;
1774  a[9] = x1i;
1775  a[10] = x6r;
1776  a[11] = x6i;
1777  a[12] = x2r;
1778  a[13] = x2i;
1779  a[14] = x4r;
1780  a[15] = x4i;
1781 }
1782 
1783 
1784 extern "C" void cftf1st(int n, double *a, double *w)
1785 {
1786  int j, j0, j1, j2, j3, k, m, mh;
1787  double wn4r, csc1, csc3, wk1r, wk1i, wk3r, wk3i,
1788  wd1r, wd1i, wd3r, wd3i;
1789  double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i,
1790  y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i;
1791 
1792  mh = n >> 3;
1793  m = 2 * mh;
1794  j1 = m;
1795  j2 = j1 + m;
1796  j3 = j2 + m;
1797  x0r = a[0] + a[j2];
1798  x0i = a[1] + a[j2 + 1];
1799  x1r = a[0] - a[j2];
1800  x1i = a[1] - a[j2 + 1];
1801  x2r = a[j1] + a[j3];
1802  x2i = a[j1 + 1] + a[j3 + 1];
1803  x3r = a[j1] - a[j3];
1804  x3i = a[j1 + 1] - a[j3 + 1];
1805  a[0] = x0r + x2r;
1806  a[1] = x0i + x2i;
1807  a[j1] = x0r - x2r;
1808  a[j1 + 1] = x0i - x2i;
1809  a[j2] = x1r - x3i;
1810  a[j2 + 1] = x1i + x3r;
1811  a[j3] = x1r + x3i;
1812  a[j3 + 1] = x1i - x3r;
1813  wn4r = w[1];
1814  csc1 = w[2];
1815  csc3 = w[3];
1816  wd1r = 1;
1817  wd1i = 0;
1818  wd3r = 1;
1819  wd3i = 0;
1820  k = 0;
1821  for (j = 2; j < mh - 2; j += 4) {
1822  k += 4;
1823  wk1r = csc1 * (wd1r + w[k]);
1824  wk1i = csc1 * (wd1i + w[k + 1]);
1825  wk3r = csc3 * (wd3r + w[k + 2]);
1826  wk3i = csc3 * (wd3i + w[k + 3]);
1827  wd1r = w[k];
1828  wd1i = w[k + 1];
1829  wd3r = w[k + 2];
1830  wd3i = w[k + 3];
1831  j1 = j + m;
1832  j2 = j1 + m;
1833  j3 = j2 + m;
1834  x0r = a[j] + a[j2];
1835  x0i = a[j + 1] + a[j2 + 1];
1836  x1r = a[j] - a[j2];
1837  x1i = a[j + 1] - a[j2 + 1];
1838  y0r = a[j + 2] + a[j2 + 2];
1839  y0i = a[j + 3] + a[j2 + 3];
1840  y1r = a[j + 2] - a[j2 + 2];
1841  y1i = a[j + 3] - a[j2 + 3];
1842  x2r = a[j1] + a[j3];
1843  x2i = a[j1 + 1] + a[j3 + 1];
1844  x3r = a[j1] - a[j3];
1845  x3i = a[j1 + 1] - a[j3 + 1];
1846  y2r = a[j1 + 2] + a[j3 + 2];
1847  y2i = a[j1 + 3] + a[j3 + 3];
1848  y3r = a[j1 + 2] - a[j3 + 2];
1849  y3i = a[j1 + 3] - a[j3 + 3];
1850  a[j] = x0r + x2r;
1851  a[j + 1] = x0i + x2i;
1852  a[j + 2] = y0r + y2r;
1853  a[j + 3] = y0i + y2i;
1854  a[j1] = x0r - x2r;
1855  a[j1 + 1] = x0i - x2i;
1856  a[j1 + 2] = y0r - y2r;
1857  a[j1 + 3] = y0i - y2i;
1858  x0r = x1r - x3i;
1859  x0i = x1i + x3r;
1860  a[j2] = wk1r * x0r - wk1i * x0i;
1861  a[j2 + 1] = wk1r * x0i + wk1i * x0r;
1862  x0r = y1r - y3i;
1863  x0i = y1i + y3r;
1864  a[j2 + 2] = wd1r * x0r - wd1i * x0i;
1865  a[j2 + 3] = wd1r * x0i + wd1i * x0r;
1866  x0r = x1r + x3i;
1867  x0i = x1i - x3r;
1868  a[j3] = wk3r * x0r + wk3i * x0i;
1869  a[j3 + 1] = wk3r * x0i - wk3i * x0r;
1870  x0r = y1r + y3i;
1871  x0i = y1i - y3r;
1872  a[j3 + 2] = wd3r * x0r + wd3i * x0i;
1873  a[j3 + 3] = wd3r * x0i - wd3i * x0r;
1874  j0 = m - j;
1875  j1 = j0 + m;
1876  j2 = j1 + m;
1877  j3 = j2 + m;
1878  x0r = a[j0] + a[j2];
1879  x0i = a[j0 + 1] + a[j2 + 1];
1880  x1r = a[j0] - a[j2];
1881  x1i = a[j0 + 1] - a[j2 + 1];
1882  y0r = a[j0 - 2] + a[j2 - 2];
1883  y0i = a[j0 - 1] + a[j2 - 1];
1884  y1r = a[j0 - 2] - a[j2 - 2];
1885  y1i = a[j0 - 1] - a[j2 - 1];
1886  x2r = a[j1] + a[j3];
1887  x2i = a[j1 + 1] + a[j3 + 1];
1888  x3r = a[j1] - a[j3];
1889  x3i = a[j1 + 1] - a[j3 + 1];
1890  y2r = a[j1 - 2] + a[j3 - 2];
1891  y2i = a[j1 - 1] + a[j3 - 1];
1892  y3r = a[j1 - 2] - a[j3 - 2];
1893  y3i = a[j1 - 1] - a[j3 - 1];
1894  a[j0] = x0r + x2r;
1895  a[j0 + 1] = x0i + x2i;
1896  a[j0 - 2] = y0r + y2r;
1897  a[j0 - 1] = y0i + y2i;
1898  a[j1] = x0r - x2r;
1899  a[j1 + 1] = x0i - x2i;
1900  a[j1 - 2] = y0r - y2r;
1901  a[j1 - 1] = y0i - y2i;
1902  x0r = x1r - x3i;
1903  x0i = x1i + x3r;
1904  a[j2] = wk1i * x0r - wk1r * x0i;
1905  a[j2 + 1] = wk1i * x0i + wk1r * x0r;
1906  x0r = y1r - y3i;
1907  x0i = y1i + y3r;
1908  a[j2 - 2] = wd1i * x0r - wd1r * x0i;
1909  a[j2 - 1] = wd1i * x0i + wd1r * x0r;
1910  x0r = x1r + x3i;
1911  x0i = x1i - x3r;
1912  a[j3] = wk3i * x0r + wk3r * x0i;
1913  a[j3 + 1] = wk3i * x0i - wk3r * x0r;
1914  x0r = y1r + y3i;
1915  x0i = y1i - y3r;
1916  a[j3 - 2] = wd3i * x0r + wd3r * x0i;
1917  a[j3 - 1] = wd3i * x0i - wd3r * x0r;
1918  }
1919  wk1r = csc1 * (wd1r + wn4r);
1920  wk1i = csc1 * (wd1i + wn4r);
1921  wk3r = csc3 * (wd3r - wn4r);
1922  wk3i = csc3 * (wd3i - wn4r);
1923  j0 = mh;
1924  j1 = j0 + m;
1925  j2 = j1 + m;
1926  j3 = j2 + m;
1927  x0r = a[j0 - 2] + a[j2 - 2];
1928  x0i = a[j0 - 1] + a[j2 - 1];
1929  x1r = a[j0 - 2] - a[j2 - 2];
1930  x1i = a[j0 - 1] - a[j2 - 1];
1931  x2r = a[j1 - 2] + a[j3 - 2];
1932  x2i = a[j1 - 1] + a[j3 - 1];
1933  x3r = a[j1 - 2] - a[j3 - 2];
1934  x3i = a[j1 - 1] - a[j3 - 1];
1935  a[j0 - 2] = x0r + x2r;
1936  a[j0 - 1] = x0i + x2i;
1937  a[j1 - 2] = x0r - x2r;
1938  a[j1 - 1] = x0i - x2i;
1939  x0r = x1r - x3i;
1940  x0i = x1i + x3r;
1941  a[j2 - 2] = wk1r * x0r - wk1i * x0i;
1942  a[j2 - 1] = wk1r * x0i + wk1i * x0r;
1943  x0r = x1r + x3i;
1944  x0i = x1i - x3r;
1945  a[j3 - 2] = wk3r * x0r + wk3i * x0i;
1946  a[j3 - 1] = wk3r * x0i - wk3i * x0r;
1947  x0r = a[j0] + a[j2];
1948  x0i = a[j0 + 1] + a[j2 + 1];
1949  x1r = a[j0] - a[j2];
1950  x1i = a[j0 + 1] - a[j2 + 1];
1951  x2r = a[j1] + a[j3];
1952  x2i = a[j1 + 1] + a[j3 + 1];
1953  x3r = a[j1] - a[j3];
1954  x3i = a[j1 + 1] - a[j3 + 1];
1955  a[j0] = x0r + x2r;
1956  a[j0 + 1] = x0i + x2i;
1957  a[j1] = x0r - x2r;
1958  a[j1 + 1] = x0i - x2i;
1959  x0r = x1r - x3i;
1960  x0i = x1i + x3r;
1961  a[j2] = wn4r * (x0r - x0i);
1962  a[j2 + 1] = wn4r * (x0i + x0r);
1963  x0r = x1r + x3i;
1964  x0i = x1i - x3r;
1965  a[j3] = -wn4r * (x0r + x0i);
1966  a[j3 + 1] = -wn4r * (x0i - x0r);
1967  x0r = a[j0 + 2] + a[j2 + 2];
1968  x0i = a[j0 + 3] + a[j2 + 3];
1969  x1r = a[j0 + 2] - a[j2 + 2];
1970  x1i = a[j0 + 3] - a[j2 + 3];
1971  x2r = a[j1 + 2] + a[j3 + 2];
1972  x2i = a[j1 + 3] + a[j3 + 3];
1973  x3r = a[j1 + 2] - a[j3 + 2];
1974  x3i = a[j1 + 3] - a[j3 + 3];
1975  a[j0 + 2] = x0r + x2r;
1976  a[j0 + 3] = x0i + x2i;
1977  a[j1 + 2] = x0r - x2r;
1978  a[j1 + 3] = x0i - x2i;
1979  x0r = x1r - x3i;
1980  x0i = x1i + x3r;
1981  a[j2 + 2] = wk1i * x0r - wk1r * x0i;
1982  a[j2 + 3] = wk1i * x0i + wk1r * x0r;
1983  x0r = x1r + x3i;
1984  x0i = x1i - x3r;
1985  a[j3 + 2] = wk3i * x0r + wk3r * x0i;
1986  a[j3 + 3] = wk3i * x0i - wk3r * x0r;
1987 }
1988 
1989 
1990 extern "C" void cftb1st(int n, double *a, double *w)
1991 {
1992  int j, j0, j1, j2, j3, k, m, mh;
1993  double wn4r, csc1, csc3, wk1r, wk1i, wk3r, wk3i,
1994  wd1r, wd1i, wd3r, wd3i;
1995  double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i,
1996  y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i;
1997 
1998  mh = n >> 3;
1999  m = 2 * mh;
2000  j1 = m;
2001  j2 = j1 + m;
2002  j3 = j2 + m;
2003  x0r = a[0] + a[j2];
2004  x0i = -a[1] - a[j2 + 1];
2005  x1r = a[0] - a[j2];
2006  x1i = -a[1] + a[j2 + 1];
2007  x2r = a[j1] + a[j3];
2008  x2i = a[j1 + 1] + a[j3 + 1];
2009  x3r = a[j1] - a[j3];
2010  x3i = a[j1 + 1] - a[j3 + 1];
2011  a[0] = x0r + x2r;
2012  a[1] = x0i - x2i;
2013  a[j1] = x0r - x2r;
2014  a[j1 + 1] = x0i + x2i;
2015  a[j2] = x1r + x3i;
2016  a[j2 + 1] = x1i + x3r;
2017  a[j3] = x1r - x3i;
2018  a[j3 + 1] = x1i - x3r;
2019  wn4r = w[1];
2020  csc1 = w[2];
2021  csc3 = w[3];
2022  wd1r = 1;
2023  wd1i = 0;
2024  wd3r = 1;
2025  wd3i = 0;
2026  k = 0;
2027  for (j = 2; j < mh - 2; j += 4) {
2028  k += 4;
2029  wk1r = csc1 * (wd1r + w[k]);
2030  wk1i = csc1 * (wd1i + w[k + 1]);
2031  wk3r = csc3 * (wd3r + w[k + 2]);
2032  wk3i = csc3 * (wd3i + w[k + 3]);
2033  wd1r = w[k];
2034  wd1i = w[k + 1];
2035  wd3r = w[k + 2];
2036  wd3i = w[k + 3];
2037  j1 = j + m;
2038  j2 = j1 + m;
2039  j3 = j2 + m;
2040  x0r = a[j] + a[j2];
2041  x0i = -a[j + 1] - a[j2 + 1];
2042  x1r = a[j] - a[j2];
2043  x1i = -a[j + 1] + a[j2 + 1];
2044  y0r = a[j + 2] + a[j2 + 2];
2045  y0i = -a[j + 3] - a[j2 + 3];
2046  y1r = a[j + 2] - a[j2 + 2];
2047  y1i = -a[j + 3] + a[j2 + 3];
2048  x2r = a[j1] + a[j3];
2049  x2i = a[j1 + 1] + a[j3 + 1];
2050  x3r = a[j1] - a[j3];
2051  x3i = a[j1 + 1] - a[j3 + 1];
2052  y2r = a[j1 + 2] + a[j3 + 2];
2053  y2i = a[j1 + 3] + a[j3 + 3];
2054  y3r = a[j1 + 2] - a[j3 + 2];
2055  y3i = a[j1 + 3] - a[j3 + 3];
2056  a[j] = x0r + x2r;
2057  a[j + 1] = x0i - x2i;
2058  a[j + 2] = y0r + y2r;
2059  a[j + 3] = y0i - y2i;
2060  a[j1] = x0r - x2r;
2061  a[j1 + 1] = x0i + x2i;
2062  a[j1 + 2] = y0r - y2r;
2063  a[j1 + 3] = y0i + y2i;
2064  x0r = x1r + x3i;
2065  x0i = x1i + x3r;
2066  a[j2] = wk1r * x0r - wk1i * x0i;
2067  a[j2 + 1] = wk1r * x0i + wk1i * x0r;
2068  x0r = y1r + y3i;
2069  x0i = y1i + y3r;
2070  a[j2 + 2] = wd1r * x0r - wd1i * x0i;
2071  a[j2 + 3] = wd1r * x0i + wd1i * x0r;
2072  x0r = x1r - x3i;
2073  x0i = x1i - x3r;
2074  a[j3] = wk3r * x0r + wk3i * x0i;
2075  a[j3 + 1] = wk3r * x0i - wk3i * x0r;
2076  x0r = y1r - y3i;
2077  x0i = y1i - y3r;
2078  a[j3 + 2] = wd3r * x0r + wd3i * x0i;
2079  a[j3 + 3] = wd3r * x0i - wd3i * x0r;
2080  j0 = m - j;
2081  j1 = j0 + m;
2082  j2 = j1 + m;
2083  j3 = j2 + m;
2084  x0r = a[j0] + a[j2];
2085  x0i = -a[j0 + 1] - a[j2 + 1];
2086  x1r = a[j0] - a[j2];
2087  x1i = -a[j0 + 1] + a[j2 + 1];
2088  y0r = a[j0 - 2] + a[j2 - 2];
2089  y0i = -a[j0 - 1] - a[j2 - 1];
2090  y1r = a[j0 - 2] - a[j2 - 2];
2091  y1i = -a[j0 - 1] + a[j2 - 1];
2092  x2r = a[j1] + a[j3];
2093  x2i = a[j1 + 1] + a[j3 + 1];
2094  x3r = a[j1] - a[j3];
2095  x3i = a[j1 + 1] - a[j3 + 1];
2096  y2r = a[j1 - 2] + a[j3 - 2];
2097  y2i = a[j1 - 1] + a[j3 - 1];
2098  y3r = a[j1 - 2] - a[j3 - 2];
2099  y3i = a[j1 - 1] - a[j3 - 1];
2100  a[j0] = x0r + x2r;
2101  a[j0 + 1] = x0i - x2i;
2102  a[j0 - 2] = y0r + y2r;
2103  a[j0 - 1] = y0i - y2i;
2104  a[j1] = x0r - x2r;
2105  a[j1 + 1] = x0i + x2i;
2106  a[j1 - 2] = y0r - y2r;
2107  a[j1 - 1] = y0i + y2i;
2108  x0r = x1r + x3i;
2109  x0i = x1i + x3r;
2110  a[j2] = wk1i * x0r - wk1r * x0i;
2111  a[j2 + 1] = wk1i * x0i + wk1r * x0r;
2112  x0r = y1r + y3i;
2113  x0i = y1i + y3r;
2114  a[j2 - 2] = wd1i * x0r - wd1r * x0i;
2115  a[j2 - 1] = wd1i * x0i + wd1r * x0r;
2116  x0r = x1r - x3i;
2117  x0i = x1i - x3r;
2118  a[j3] = wk3i * x0r + wk3r * x0i;
2119  a[j3 + 1] = wk3i * x0i - wk3r * x0r;
2120  x0r = y1r - y3i;
2121  x0i = y1i - y3r;
2122  a[j3 - 2] = wd3i * x0r + wd3r * x0i;
2123  a[j3 - 1] = wd3i * x0i - wd3r * x0r;
2124  }
2125  wk1r = csc1 * (wd1r + wn4r);
2126  wk1i = csc1 * (wd1i + wn4r);
2127  wk3r = csc3 * (wd3r - wn4r);
2128  wk3i = csc3 * (wd3i - wn4r);
2129  j0 = mh;
2130  j1 = j0 + m;
2131  j2 = j1 + m;
2132  j3 = j2 + m;
2133  x0r = a[j0 - 2] + a[j2 - 2];
2134  x0i = -a[j0 - 1] - a[j2 - 1];
2135  x1r = a[j0 - 2] - a[j2 - 2];
2136  x1i = -a[j0 - 1] + a[j2 - 1];
2137  x2r = a[j1 - 2] + a[j3 - 2];
2138  x2i = a[j1 - 1] + a[j3 - 1];
2139  x3r = a[j1 - 2] - a[j3 - 2];
2140  x3i = a[j1 - 1] - a[j3 - 1];
2141  a[j0 - 2] = x0r + x2r;
2142  a[j0 - 1] = x0i - x2i;
2143  a[j1 - 2] = x0r - x2r;
2144  a[j1 - 1] = x0i + x2i;
2145  x0r = x1r + x3i;
2146  x0i = x1i + x3r;
2147  a[j2 - 2] = wk1r * x0r - wk1i * x0i;
2148  a[j2 - 1] = wk1r * x0i + wk1i * x0r;
2149  x0r = x1r - x3i;
2150  x0i = x1i - x3r;
2151  a[j3 - 2] = wk3r * x0r + wk3i * x0i;
2152  a[j3 - 1] = wk3r * x0i - wk3i * x0r;
2153  x0r = a[j0] + a[j2];
2154  x0i = -a[j0 + 1] - a[j2 + 1];
2155  x1r = a[j0] - a[j2];
2156  x1i = -a[j0 + 1] + a[j2 + 1];
2157  x2r = a[j1] + a[j3];
2158  x2i = a[j1 + 1] + a[j3 + 1];
2159  x3r = a[j1] - a[j3];
2160  x3i = a[j1 + 1] - a[j3 + 1];
2161  a[j0] = x0r + x2r;
2162  a[j0 + 1] = x0i - x2i;
2163  a[j1] = x0r - x2r;
2164  a[j1 + 1] = x0i + x2i;
2165  x0r = x1r + x3i;
2166  x0i = x1i + x3r;
2167  a[j2] = wn4r * (x0r - x0i);
2168  a[j2 + 1] = wn4r * (x0i + x0r);
2169  x0r = x1r - x3i;
2170  x0i = x1i - x3r;
2171  a[j3] = -wn4r * (x0r + x0i);
2172  a[j3 + 1] = -wn4r * (x0i - x0r);
2173  x0r = a[j0 + 2] + a[j2 + 2];
2174  x0i = -a[j0 + 3] - a[j2 + 3];
2175  x1r = a[j0 + 2] - a[j2 + 2];
2176  x1i = -a[j0 + 3] + a[j2 + 3];
2177  x2r = a[j1 + 2] + a[j3 + 2];
2178  x2i = a[j1 + 3] + a[j3 + 3];
2179  x3r = a[j1 + 2] - a[j3 + 2];
2180  x3i = a[j1 + 3] - a[j3 + 3];
2181  a[j0 + 2] = x0r + x2r;
2182  a[j0 + 3] = x0i - x2i;
2183  a[j1 + 2] = x0r - x2r;
2184  a[j1 + 3] = x0i + x2i;
2185  x0r = x1r + x3i;
2186  x0i = x1i + x3r;
2187  a[j2 + 2] = wk1i * x0r - wk1r * x0i;
2188  a[j2 + 3] = wk1i * x0i + wk1r * x0r;
2189  x0r = x1r - x3i;
2190  x0i = x1i - x3r;
2191  a[j3 + 2] = wk3i * x0r + wk3r * x0i;
2192  a[j3 + 3] = wk3i * x0i - wk3r * x0r;
2193 }
2194 
2195 
2196 #ifdef USE_CDFT_THREADS
2197 struct cdft_arg_st {
2198  int n0;
2199  int n;
2200  double *a;
2201  int nw;
2202  double *w;
2203 };
2204 typedef struct cdft_arg_st cdft_arg_t;
2205 
2206 
2207 extern "C" void cftrec4_th(int n, double *a, int nw, double *w)
2208 {
2209  void *cftrec1_th(void *p);
2210  void *cftrec2_th(void *p);
2211  int i, idiv4, m, nthread;
2212  cdft_thread_t th[4];
2213  cdft_arg_t ag[4];
2214 
2215  nthread = 2;
2216  idiv4 = 0;
2217  m = n >> 1;
2218  if (n > CDFT_4THREADS_BEGIN_N) {
2219  nthread = 4;
2220  idiv4 = 1;
2221  m >>= 1;
2222  }
2223  for (i = 0; i < nthread; i++) {
2224  ag[i].n0 = n;
2225  ag[i].n = m;
2226  ag[i].a = &a[i * m];
2227  ag[i].nw = nw;
2228  ag[i].w = w;
2229  if (i != idiv4) {
2230  cdft_thread_create(&th[i], cftrec1_th, &ag[i]);
2231  } else {
2232  cdft_thread_create(&th[i], cftrec2_th, &ag[i]);
2233  }
2234  }
2235  for (i = 0; i < nthread; i++) {
2236  cdft_thread_wait(th[i]);
2237  }
2238 }
2239 
2240 
2241 extern "C" void *cftrec1_th(void *p)
2242 {
2243  int cfttree(int n, int j, int k, double *a, int nw, double *w);
2244  void cftleaf(int n, int isplt, double *a, int nw, double *w);
2245  void cftmdl1(int n, double *a, double *w);
2246  int isplt, j, k, m, n, n0, nw;
2247  double *a, *w;
2248 
2249  n0 = ((cdft_arg_t *) p)->n0;
2250  n = ((cdft_arg_t *) p)->n;
2251  a = ((cdft_arg_t *) p)->a;
2252  nw = ((cdft_arg_t *) p)->nw;
2253  w = ((cdft_arg_t *) p)->w;
2254  m = n0;
2255  while (m > 512) {
2256  m >>= 2;
2257  cftmdl1(m, &a[n - m], &w[nw - (m >> 1)]);
2258  }
2259  cftleaf(m, 1, &a[n - m], nw, w);
2260  k = 0;
2261  for (j = n - m; j > 0; j -= m) {
2262  k++;
2263  isplt = cfttree(m, j, k, a, nw, w);
2264  cftleaf(m, isplt, &a[j - m], nw, w);
2265  }
2266  return (void *) 0;
2267 }
2268 
2269 
2270 extern "C" void *cftrec2_th(void *p)
2271 {
2272  int cfttree(int n, int j, int k, double *a, int nw, double *w);
2273  void cftleaf(int n, int isplt, double *a, int nw, double *w);
2274  void cftmdl2(int n, double *a, double *w);
2275  int isplt, j, k, m, n, n0, nw;
2276  double *a, *w;
2277 
2278  n0 = ((cdft_arg_t *) p)->n0;
2279  n = ((cdft_arg_t *) p)->n;
2280  a = ((cdft_arg_t *) p)->a;
2281  nw = ((cdft_arg_t *) p)->nw;
2282  w = ((cdft_arg_t *) p)->w;
2283  k = 1;
2284  m = n0;
2285  while (m > 512) {
2286  m >>= 2;
2287  k <<= 2;
2288  cftmdl2(m, &a[n - m], &w[nw - m]);
2289  }
2290  cftleaf(m, 0, &a[n - m], nw, w);
2291  k >>= 1;
2292  for (j = n - m; j > 0; j -= m) {
2293  k++;
2294  isplt = cfttree(m, j, k, a, nw, w);
2295  cftleaf(m, isplt, &a[j - m], nw, w);
2296  }
2297  return (void *) 0;
2298 }
2299 #endif /* USE_CDFT_THREADS */
2300 
2301 
2302 extern "C" void cftrec4(int n, double *a, int nw, double *w)
2303 {
2304  int cfttree(int n, int j, int k, double *a, int nw, double *w);
2305  void cftleaf(int n, int isplt, double *a, int nw, double *w);
2306  void cftmdl1(int n, double *a, double *w);
2307  int isplt, j, k, m;
2308 
2309  m = n;
2310  while (m > 512) {
2311  m >>= 2;
2312  cftmdl1(m, &a[n - m], &w[nw - (m >> 1)]);
2313  }
2314  cftleaf(m, 1, &a[n - m], nw, w);
2315  k = 0;
2316  for (j = n - m; j > 0; j -= m) {
2317  k++;
2318  isplt = cfttree(m, j, k, a, nw, w);
2319  cftleaf(m, isplt, &a[j - m], nw, w);
2320  }
2321 }
2322 
2323 
2324 extern "C" int cfttree(int n, int j, int k, double *a, int nw, double *w)
2325 {
2326  void cftmdl1(int n, double *a, double *w);
2327  void cftmdl2(int n, double *a, double *w);
2328  int i, isplt, m;
2329 
2330  if ((k & 3) != 0) {
2331  isplt = k & 1;
2332  if (isplt != 0) {
2333  cftmdl1(n, &a[j - n], &w[nw - (n >> 1)]);
2334  } else {
2335  cftmdl2(n, &a[j - n], &w[nw - n]);
2336  }
2337  } else {
2338  m = n;
2339  for (i = k; (i & 3) == 0; i >>= 2) {
2340  m <<= 2;
2341  }
2342  isplt = i & 1;
2343  if (isplt != 0) {
2344  while (m > 128) {
2345  cftmdl1(m, &a[j - m], &w[nw - (m >> 1)]);
2346  m >>= 2;
2347  }
2348  } else {
2349  while (m > 128) {
2350  cftmdl2(m, &a[j - m], &w[nw - m]);
2351  m >>= 2;
2352  }
2353  }
2354  }
2355  return isplt;
2356 }
2357 
2358 
2359 extern "C" void cftleaf(int n, int isplt, double *a, int nw, double *w)
2360 {
2361  void cftmdl1(int n, double *a, double *w);
2362  void cftmdl2(int n, double *a, double *w);
2363  void cftf161(double *a, double *w);
2364  void cftf162(double *a, double *w);
2365  void cftf081(double *a, double *w);
2366  void cftf082(double *a, double *w);
2367 
2368  if (n == 512) {
2369  cftmdl1(128, a, &w[nw - 64]);
2370  cftf161(a, &w[nw - 8]);
2371  cftf162(&a[32], &w[nw - 32]);
2372  cftf161(&a[64], &w[nw - 8]);
2373  cftf161(&a[96], &w[nw - 8]);
2374  cftmdl2(128, &a[128], &w[nw - 128]);
2375  cftf161(&a[128], &w[nw - 8]);
2376  cftf162(&a[160], &w[nw - 32]);
2377  cftf161(&a[192], &w[nw - 8]);
2378  cftf162(&a[224], &w[nw - 32]);
2379  cftmdl1(128, &a[256], &w[nw - 64]);
2380  cftf161(&a[256], &w[nw - 8]);
2381  cftf162(&a[288], &w[nw - 32]);
2382  cftf161(&a[320], &w[nw - 8]);
2383  cftf161(&a[352], &w[nw - 8]);
2384  if (isplt != 0) {
2385  cftmdl1(128, &a[384], &w[nw - 64]);
2386  cftf161(&a[480], &w[nw - 8]);
2387  } else {
2388  cftmdl2(128, &a[384], &w[nw - 128]);
2389  cftf162(&a[480], &w[nw - 32]);
2390  }
2391  cftf161(&a[384], &w[nw - 8]);
2392  cftf162(&a[416], &w[nw - 32]);
2393  cftf161(&a[448], &w[nw - 8]);
2394  } else {
2395  cftmdl1(64, a, &w[nw - 32]);
2396  cftf081(a, &w[nw - 8]);
2397  cftf082(&a[16], &w[nw - 8]);
2398  cftf081(&a[32], &w[nw - 8]);
2399  cftf081(&a[48], &w[nw - 8]);
2400  cftmdl2(64, &a[64], &w[nw - 64]);
2401  cftf081(&a[64], &w[nw - 8]);
2402  cftf082(&a[80], &w[nw - 8]);
2403  cftf081(&a[96], &w[nw - 8]);
2404  cftf082(&a[112], &w[nw - 8]);
2405  cftmdl1(64, &a[128], &w[nw - 32]);
2406  cftf081(&a[128], &w[nw - 8]);
2407  cftf082(&a[144], &w[nw - 8]);
2408  cftf081(&a[160], &w[nw - 8]);
2409  cftf081(&a[176], &w[nw - 8]);
2410  if (isplt != 0) {
2411  cftmdl1(64, &a[192], &w[nw - 32]);
2412  cftf081(&a[240], &w[nw - 8]);
2413  } else {
2414  cftmdl2(64, &a[192], &w[nw - 64]);
2415  cftf082(&a[240], &w[nw - 8]);
2416  }
2417  cftf081(&a[192], &w[nw - 8]);
2418  cftf082(&a[208], &w[nw - 8]);
2419  cftf081(&a[224], &w[nw - 8]);
2420  }
2421 }
2422 
2423 
2424 extern "C" void cftmdl1(int n, double *a, double *w)
2425 {
2426  int j, j0, j1, j2, j3, k, m, mh;
2427  double wn4r, wk1r, wk1i, wk3r, wk3i;
2428  double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
2429 
2430  mh = n >> 3;
2431  m = 2 * mh;
2432  j1 = m;
2433  j2 = j1 + m;
2434  j3 = j2 + m;
2435  x0r = a[0] + a[j2];
2436  x0i = a[1] + a[j2 + 1];
2437  x1r = a[0] - a[j2];
2438  x1i = a[1] - a[j2 + 1];
2439  x2r = a[j1] + a[j3];
2440  x2i = a[j1 + 1] + a[j3 + 1];
2441  x3r = a[j1] - a[j3];
2442  x3i = a[j1 + 1] - a[j3 + 1];
2443  a[0] = x0r + x2r;
2444  a[1] = x0i + x2i;
2445  a[j1] = x0r - x2r;
2446  a[j1 + 1] = x0i - x2i;
2447  a[j2] = x1r - x3i;
2448  a[j2 + 1] = x1i + x3r;
2449  a[j3] = x1r + x3i;
2450  a[j3 + 1] = x1i - x3r;
2451  wn4r = w[1];
2452  k = 0;
2453  for (j = 2; j < mh; j += 2) {
2454  k += 4;
2455  wk1r = w[k];
2456  wk1i = w[k + 1];
2457  wk3r = w[k + 2];
2458  wk3i = w[k + 3];
2459  j1 = j + m;
2460  j2 = j1 + m;
2461  j3 = j2 + m;
2462  x0r = a[j] + a[j2];
2463  x0i = a[j + 1] + a[j2 + 1];
2464  x1r = a[j] - a[j2];
2465  x1i = a[j + 1] - a[j2 + 1];
2466  x2r = a[j1] + a[j3];
2467  x2i = a[j1 + 1] + a[j3 + 1];
2468  x3r = a[j1] - a[j3];
2469  x3i = a[j1 + 1] - a[j3 + 1];
2470  a[j] = x0r + x2r;
2471  a[j + 1] = x0i + x2i;
2472  a[j1] = x0r - x2r;
2473  a[j1 + 1] = x0i - x2i;
2474  x0r = x1r - x3i;
2475  x0i = x1i + x3r;
2476  a[j2] = wk1r * x0r - wk1i * x0i;
2477  a[j2 + 1] = wk1r * x0i + wk1i * x0r;
2478  x0r = x1r + x3i;
2479  x0i = x1i - x3r;
2480  a[j3] = wk3r * x0r + wk3i * x0i;
2481  a[j3 + 1] = wk3r * x0i - wk3i * x0r;
2482  j0 = m - j;
2483  j1 = j0 + m;
2484  j2 = j1 + m;
2485  j3 = j2 + m;
2486  x0r = a[j0] + a[j2];
2487  x0i = a[j0 + 1] + a[j2 + 1];
2488  x1r = a[j0] - a[j2];
2489  x1i = a[j0 + 1] - a[j2 + 1];
2490  x2r = a[j1] + a[j3];
2491  x2i = a[j1 + 1] + a[j3 + 1];
2492  x3r = a[j1] - a[j3];
2493  x3i = a[j1 + 1] - a[j3 + 1];
2494  a[j0] = x0r + x2r;
2495  a[j0 + 1] = x0i + x2i;
2496  a[j1] = x0r - x2r;
2497  a[j1 + 1] = x0i - x2i;
2498  x0r = x1r - x3i;
2499  x0i = x1i + x3r;
2500  a[j2] = wk1i * x0r - wk1r * x0i;
2501  a[j2 + 1] = wk1i * x0i + wk1r * x0r;
2502  x0r = x1r + x3i;
2503  x0i = x1i - x3r;
2504  a[j3] = wk3i * x0r + wk3r * x0i;
2505  a[j3 + 1] = wk3i * x0i - wk3r * x0r;
2506  }
2507  j0 = mh;
2508  j1 = j0 + m;
2509  j2 = j1 + m;
2510  j3 = j2 + m;
2511  x0r = a[j0] + a[j2];
2512  x0i = a[j0 + 1] + a[j2 + 1];
2513  x1r = a[j0] - a[j2];
2514  x1i = a[j0 + 1] - a[j2 + 1];
2515  x2r = a[j1] + a[j3];
2516  x2i = a[j1 + 1] + a[j3 + 1];
2517  x3r = a[j1] - a[j3];
2518  x3i = a[j1 + 1] - a[j3 + 1];
2519  a[j0] = x0r + x2r;
2520  a[j0 + 1] = x0i + x2i;
2521  a[j1] = x0r - x2r;
2522  a[j1 + 1] = x0i - x2i;
2523  x0r = x1r - x3i;
2524  x0i = x1i + x3r;
2525  a[j2] = wn4r * (x0r - x0i);
2526  a[j2 + 1] = wn4r * (x0i + x0r);
2527  x0r = x1r + x3i;
2528  x0i = x1i - x3r;
2529  a[j3] = -wn4r * (x0r + x0i);
2530  a[j3 + 1] = -wn4r * (x0i - x0r);
2531 }
2532 
2533 
2534 extern "C" void cftmdl2(int n, double *a, double *w)
2535 {
2536  int j, j0, j1, j2, j3, k, kr, m, mh;
2537  double wn4r, wk1r, wk1i, wk3r, wk3i, wd1r, wd1i, wd3r, wd3i;
2538  double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y2r, y2i;
2539 
2540  mh = n >> 3;
2541  m = 2 * mh;
2542  wn4r = w[1];
2543  j1 = m;
2544  j2 = j1 + m;
2545  j3 = j2 + m;
2546  x0r = a[0] - a[j2 + 1];
2547  x0i = a[1] + a[j2];
2548  x1r = a[0] + a[j2 + 1];
2549  x1i = a[1] - a[j2];
2550  x2r = a[j1] - a[j3 + 1];
2551  x2i = a[j1 + 1] + a[j3];
2552  x3r = a[j1] + a[j3 + 1];
2553  x3i = a[j1 + 1] - a[j3];
2554  y0r = wn4r * (x2r - x2i);
2555  y0i = wn4r * (x2i + x2r);
2556  a[0] = x0r + y0r;
2557  a[1] = x0i + y0i;
2558  a[j1] = x0r - y0r;
2559  a[j1 + 1] = x0i - y0i;
2560  y0r = wn4r * (x3r - x3i);
2561  y0i = wn4r * (x3i + x3r);
2562  a[j2] = x1r - y0i;
2563  a[j2 + 1] = x1i + y0r;
2564  a[j3] = x1r + y0i;
2565  a[j3 + 1] = x1i - y0r;
2566  k = 0;
2567  kr = 2 * m;
2568  for (j = 2; j < mh; j += 2) {
2569  k += 4;
2570  wk1r = w[k];
2571  wk1i = w[k + 1];
2572  wk3r = w[k + 2];
2573  wk3i = w[k + 3];
2574  kr -= 4;
2575  wd1i = w[kr];
2576  wd1r = w[kr + 1];
2577  wd3i = w[kr + 2];
2578  wd3r = w[kr + 3];
2579  j1 = j + m;
2580  j2 = j1 + m;
2581  j3 = j2 + m;
2582  x0r = a[j] - a[j2 + 1];
2583  x0i = a[j + 1] + a[j2];
2584  x1r = a[j] + a[j2 + 1];
2585  x1i = a[j + 1] - a[j2];
2586  x2r = a[j1] - a[j3 + 1];
2587  x2i = a[j1 + 1] + a[j3];
2588  x3r = a[j1] + a[j3 + 1];
2589  x3i = a[j1 + 1] - a[j3];
2590  y0r = wk1r * x0r - wk1i * x0i;
2591  y0i = wk1r * x0i + wk1i * x0r;
2592  y2r = wd1r * x2r - wd1i * x2i;
2593  y2i = wd1r * x2i + wd1i * x2r;
2594  a[j] = y0r + y2r;
2595  a[j + 1] = y0i + y2i;
2596  a[j1] = y0r - y2r;
2597  a[j1 + 1] = y0i - y2i;
2598  y0r = wk3r * x1r + wk3i * x1i;
2599  y0i = wk3r * x1i - wk3i * x1r;
2600  y2r = wd3r * x3r + wd3i * x3i;
2601  y2i = wd3r * x3i - wd3i * x3r;
2602  a[j2] = y0r + y2r;
2603  a[j2 + 1] = y0i + y2i;
2604  a[j3] = y0r - y2r;
2605  a[j3 + 1] = y0i - y2i;
2606  j0 = m - j;
2607  j1 = j0 + m;
2608  j2 = j1 + m;
2609  j3 = j2 + m;
2610  x0r = a[j0] - a[j2 + 1];
2611  x0i = a[j0 + 1] + a[j2];
2612  x1r = a[j0] + a[j2 + 1];
2613  x1i = a[j0 + 1] - a[j2];
2614  x2r = a[j1] - a[j3 + 1];
2615  x2i = a[j1 + 1] + a[j3];
2616  x3r = a[j1] + a[j3 + 1];
2617  x3i = a[j1 + 1] - a[j3];
2618  y0r = wd1i * x0r - wd1r * x0i;
2619  y0i = wd1i * x0i + wd1r * x0r;
2620  y2r = wk1i * x2r - wk1r * x2i;
2621  y2i = wk1i * x2i + wk1r * x2r;
2622  a[j0] = y0r + y2r;
2623  a[j0 + 1] = y0i + y2i;
2624  a[j1] = y0r - y2r;
2625  a[j1 + 1] = y0i - y2i;
2626  y0r = wd3i * x1r + wd3r * x1i;
2627  y0i = wd3i * x1i - wd3r * x1r;
2628  y2r = wk3i * x3r + wk3r * x3i;
2629  y2i = wk3i * x3i - wk3r * x3r;
2630  a[j2] = y0r + y2r;
2631  a[j2 + 1] = y0i + y2i;
2632  a[j3] = y0r - y2r;
2633  a[j3 + 1] = y0i - y2i;
2634  }
2635  wk1r = w[m];
2636  wk1i = w[m + 1];
2637  j0 = mh;
2638  j1 = j0 + m;
2639  j2 = j1 + m;
2640  j3 = j2 + m;
2641  x0r = a[j0] - a[j2 + 1];
2642  x0i = a[j0 + 1] + a[j2];
2643  x1r = a[j0] + a[j2 + 1];
2644  x1i = a[j0 + 1] - a[j2];
2645  x2r = a[j1] - a[j3 + 1];
2646  x2i = a[j1 + 1] + a[j3];
2647  x3r = a[j1] + a[j3 + 1];
2648  x3i = a[j1 + 1] - a[j3];
2649  y0r = wk1r * x0r - wk1i * x0i;
2650  y0i = wk1r * x0i + wk1i * x0r;
2651  y2r = wk1i * x2r - wk1r * x2i;
2652  y2i = wk1i * x2i + wk1r * x2r;
2653  a[j0] = y0r + y2r;
2654  a[j0 + 1] = y0i + y2i;
2655  a[j1] = y0r - y2r;
2656  a[j1 + 1] = y0i - y2i;
2657  y0r = wk1i * x1r - wk1r * x1i;
2658  y0i = wk1i * x1i + wk1r * x1r;
2659  y2r = wk1r * x3r - wk1i * x3i;
2660  y2i = wk1r * x3i + wk1i * x3r;
2661  a[j2] = y0r - y2r;
2662  a[j2 + 1] = y0i - y2i;
2663  a[j3] = y0r + y2r;
2664  a[j3 + 1] = y0i + y2i;
2665 }
2666 
2667 
2668 extern "C" void cftfx41(int n, double *a, int nw, double *w)
2669 {
2670  void cftf161(double *a, double *w);
2671  void cftf162(double *a, double *w);
2672  void cftf081(double *a, double *w);
2673  void cftf082(double *a, double *w);
2674 
2675  if (n == 128) {
2676  cftf161(a, &w[nw - 8]);
2677  cftf162(&a[32], &w[nw - 32]);
2678  cftf161(&a[64], &w[nw - 8]);
2679  cftf161(&a[96], &w[nw - 8]);
2680  } else {
2681  cftf081(a, &w[nw - 8]);
2682  cftf082(&a[16], &w[nw - 8]);
2683  cftf081(&a[32], &w[nw - 8]);
2684  cftf081(&a[48], &w[nw - 8]);
2685  }
2686 }
2687 
2688 
2689 extern "C" void cftf161(double *a, double *w)
2690 {
2691  double wn4r, wk1r, wk1i,
2692  x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i,
2693  y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i,
2694  y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i,
2695  y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i,
2696  y12r, y12i, y13r, y13i, y14r, y14i, y15r, y15i;
2697 
2698  wn4r = w[1];
2699  wk1r = w[2];
2700  wk1i = w[3];
2701  x0r = a[0] + a[16];
2702  x0i = a[1] + a[17];
2703  x1r = a[0] - a[16];
2704  x1i = a[1] - a[17];
2705  x2r = a[8] + a[24];
2706  x2i = a[9] + a[25];
2707  x3r = a[8] - a[24];
2708  x3i = a[9] - a[25];
2709  y0r = x0r + x2r;
2710  y0i = x0i + x2i;
2711  y4r = x0r - x2r;
2712  y4i = x0i - x2i;
2713  y8r = x1r - x3i;
2714  y8i = x1i + x3r;
2715  y12r = x1r + x3i;
2716  y12i = x1i - x3r;
2717  x0r = a[2] + a[18];
2718  x0i = a[3] + a[19];
2719  x1r = a[2] - a[18];
2720  x1i = a[3] - a[19];
2721  x2r = a[10] + a[26];
2722  x2i = a[11] + a[27];
2723  x3r = a[10] - a[26];
2724  x3i = a[11] - a[27];
2725  y1r = x0r + x2r;
2726  y1i = x0i + x2i;
2727  y5r = x0r - x2r;
2728  y5i = x0i - x2i;
2729  x0r = x1r - x3i;
2730  x0i = x1i + x3r;
2731  y9r = wk1r * x0r - wk1i * x0i;
2732  y9i = wk1r * x0i + wk1i * x0r;
2733  x0r = x1r + x3i;
2734  x0i = x1i - x3r;
2735  y13r = wk1i * x0r - wk1r * x0i;
2736  y13i = wk1i * x0i + wk1r * x0r;
2737  x0r = a[4] + a[20];
2738  x0i = a[5] + a[21];
2739  x1r = a[4] - a[20];
2740  x1i = a[5] - a[21];
2741  x2r = a[12] + a[28];
2742  x2i = a[13] + a[29];
2743  x3r = a[12] - a[28];
2744  x3i = a[13] - a[29];
2745  y2r = x0r + x2r;
2746  y2i = x0i + x2i;
2747  y6r = x0r - x2r;
2748  y6i = x0i - x2i;
2749  x0r = x1r - x3i;
2750  x0i = x1i + x3r;
2751  y10r = wn4r * (x0r - x0i);
2752  y10i = wn4r * (x0i + x0r);
2753  x0r = x1r + x3i;
2754  x0i = x1i - x3r;
2755  y14r = wn4r * (x0r + x0i);
2756  y14i = wn4r * (x0i - x0r);
2757  x0r = a[6] + a[22];
2758  x0i = a[7] + a[23];
2759  x1r = a[6] - a[22];
2760  x1i = a[7] - a[23];
2761  x2r = a[14] + a[30];
2762  x2i = a[15] + a[31];
2763  x3r = a[14] - a[30];
2764  x3i = a[15] - a[31];
2765  y3r = x0r + x2r;
2766  y3i = x0i + x2i;
2767  y7r = x0r - x2r;
2768  y7i = x0i - x2i;
2769  x0r = x1r - x3i;
2770  x0i = x1i + x3r;
2771  y11r = wk1i * x0r - wk1r * x0i;
2772  y11i = wk1i * x0i + wk1r * x0r;
2773  x0r = x1r + x3i;
2774  x0i = x1i - x3r;
2775  y15r = wk1r * x0r - wk1i * x0i;
2776  y15i = wk1r * x0i + wk1i * x0r;
2777  x0r = y12r - y14r;
2778  x0i = y12i - y14i;
2779  x1r = y12r + y14r;
2780  x1i = y12i + y14i;
2781  x2r = y13r - y15r;
2782  x2i = y13i - y15i;
2783  x3r = y13r + y15r;
2784  x3i = y13i + y15i;
2785  a[24] = x0r + x2r;
2786  a[25] = x0i + x2i;
2787  a[26] = x0r - x2r;
2788  a[27] = x0i - x2i;
2789  a[28] = x1r - x3i;
2790  a[29] = x1i + x3r;
2791  a[30] = x1r + x3i;
2792  a[31] = x1i - x3r;
2793  x0r = y8r + y10r;
2794  x0i = y8i + y10i;
2795  x1r = y8r - y10r;
2796  x1i = y8i - y10i;
2797  x2r = y9r + y11r;
2798  x2i = y9i + y11i;
2799  x3r = y9r - y11r;
2800  x3i = y9i - y11i;
2801  a[16] = x0r + x2r;
2802  a[17] = x0i + x2i;
2803  a[18] = x0r - x2r;
2804  a[19] = x0i - x2i;
2805  a[20] = x1r - x3i;
2806  a[21] = x1i + x3r;
2807  a[22] = x1r + x3i;
2808  a[23] = x1i - x3r;
2809  x0r = y5r - y7i;
2810  x0i = y5i + y7r;
2811  x2r = wn4r * (x0r - x0i);
2812  x2i = wn4r * (x0i + x0r);
2813  x0r = y5r + y7i;
2814  x0i = y5i - y7r;
2815  x3r = wn4r * (x0r - x0i);
2816  x3i = wn4r * (x0i + x0r);
2817  x0r = y4r - y6i;
2818  x0i = y4i + y6r;
2819  x1r = y4r + y6i;
2820  x1i = y4i - y6r;
2821  a[8] = x0r + x2r;
2822  a[9] = x0i + x2i;
2823  a[10] = x0r - x2r;
2824  a[11] = x0i - x2i;
2825  a[12] = x1r - x3i;
2826  a[13] = x1i + x3r;
2827  a[14] = x1r + x3i;
2828  a[15] = x1i - x3r;
2829  x0r = y0r + y2r;
2830  x0i = y0i + y2i;
2831  x1r = y0r - y2r;
2832  x1i = y0i - y2i;
2833  x2r = y1r + y3r;
2834  x2i = y1i + y3i;
2835  x3r = y1r - y3r;
2836  x3i = y1i - y3i;
2837  a[0] = x0r + x2r;
2838  a[1] = x0i + x2i;
2839  a[2] = x0r - x2r;
2840  a[3] = x0i - x2i;
2841  a[4] = x1r - x3i;
2842  a[5] = x1i + x3r;
2843  a[6] = x1r + x3i;
2844  a[7] = x1i - x3r;
2845 }
2846 
2847 
2848 extern "C" void cftf162(double *a, double *w)
2849 {
2850  double wn4r, wk1r, wk1i, wk2r, wk2i, wk3r, wk3i,
2851  x0r, x0i, x1r, x1i, x2r, x2i,
2852  y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i,
2853  y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i,
2854  y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i,
2855  y12r, y12i, y13r, y13i, y14r, y14i, y15r, y15i;
2856 
2857  wn4r = w[1];
2858  wk1r = w[4];
2859  wk1i = w[5];
2860  wk3r = w[6];
2861  wk3i = -w[7];
2862  wk2r = w[8];
2863  wk2i = w[9];
2864  x1r = a[0] - a[17];
2865  x1i = a[1] + a[16];
2866  x0r = a[8] - a[25];
2867  x0i = a[9] + a[24];
2868  x2r = wn4r * (x0r - x0i);
2869  x2i = wn4r * (x0i + x0r);
2870  y0r = x1r + x2r;
2871  y0i = x1i + x2i;
2872  y4r = x1r - x2r;
2873  y4i = x1i - x2i;
2874  x1r = a[0] + a[17];
2875  x1i = a[1] - a[16];
2876  x0r = a[8] + a[25];
2877  x0i = a[9] - a[24];
2878  x2r = wn4r * (x0r - x0i);
2879  x2i = wn4r * (x0i + x0r);
2880  y8r = x1r - x2i;
2881  y8i = x1i + x2r;
2882  y12r = x1r + x2i;
2883  y12i = x1i - x2r;
2884  x0r = a[2] - a[19];
2885  x0i = a[3] + a[18];
2886  x1r = wk1r * x0r - wk1i * x0i;
2887  x1i = wk1r * x0i + wk1i * x0r;
2888  x0r = a[10] - a[27];
2889  x0i = a[11] + a[26];
2890  x2r = wk3i * x0r - wk3r * x0i;
2891  x2i = wk3i * x0i + wk3r * x0r;
2892  y1r = x1r + x2r;
2893  y1i = x1i + x2i;
2894  y5r = x1r - x2r;
2895  y5i = x1i - x2i;
2896  x0r = a[2] + a[19];
2897  x0i = a[3] - a[18];
2898  x1r = wk3r * x0r - wk3i * x0i;
2899  x1i = wk3r * x0i + wk3i * x0r;
2900  x0r = a[10] + a[27];
2901  x0i = a[11] - a[26];
2902  x2r = wk1r * x0r + wk1i * x0i;
2903  x2i = wk1r * x0i - wk1i * x0r;
2904  y9r = x1r - x2r;
2905  y9i = x1i - x2i;
2906  y13r = x1r + x2r;
2907  y13i = x1i + x2i;
2908  x0r = a[4] - a[21];
2909  x0i = a[5] + a[20];
2910  x1r = wk2r * x0r - wk2i * x0i;
2911  x1i = wk2r * x0i + wk2i * x0r;
2912  x0r = a[12] - a[29];
2913  x0i = a[13] + a[28];
2914  x2r = wk2i * x0r - wk2r * x0i;
2915  x2i = wk2i * x0i + wk2r * x0r;
2916  y2r = x1r + x2r;
2917  y2i = x1i + x2i;
2918  y6r = x1r - x2r;
2919  y6i = x1i - x2i;
2920  x0r = a[4] + a[21];
2921  x0i = a[5] - a[20];
2922  x1r = wk2i * x0r - wk2r * x0i;
2923  x1i = wk2i * x0i + wk2r * x0r;
2924  x0r = a[12] + a[29];
2925  x0i = a[13] - a[28];
2926  x2r = wk2r * x0r - wk2i * x0i;
2927  x2i = wk2r * x0i + wk2i * x0r;
2928  y10r = x1r - x2r;
2929  y10i = x1i - x2i;
2930  y14r = x1r + x2r;
2931  y14i = x1i + x2i;
2932  x0r = a[6] - a[23];
2933  x0i = a[7] + a[22];
2934  x1r = wk3r * x0r - wk3i * x0i;
2935  x1i = wk3r * x0i + wk3i * x0r;
2936  x0r = a[14] - a[31];
2937  x0i = a[15] + a[30];
2938  x2r = wk1i * x0r - wk1r * x0i;
2939  x2i = wk1i * x0i + wk1r * x0r;
2940  y3r = x1r + x2r;
2941  y3i = x1i + x2i;
2942  y7r = x1r - x2r;
2943  y7i = x1i - x2i;
2944  x0r = a[6] + a[23];
2945  x0i = a[7] - a[22];
2946  x1r = wk1i * x0r + wk1r * x0i;
2947  x1i = wk1i * x0i - wk1r * x0r;
2948  x0r = a[14] + a[31];
2949  x0i = a[15] - a[30];
2950  x2r = wk3i * x0r - wk3r * x0i;
2951  x2i = wk3i * x0i + wk3r * x0r;
2952  y11r = x1r + x2r;
2953  y11i = x1i + x2i;
2954  y15r = x1r - x2r;
2955  y15i = x1i - x2i;
2956  x1r = y0r + y2r;
2957  x1i = y0i + y2i;
2958  x2r = y1r + y3r;
2959  x2i = y1i + y3i;
2960  a[0] = x1r + x2r;
2961  a[1] = x1i + x2i;
2962  a[2] = x1r - x2r;
2963  a[3] = x1i - x2i;
2964  x1r = y0r - y2r;
2965  x1i = y0i - y2i;
2966  x2r = y1r - y3r;
2967  x2i = y1i - y3i;
2968  a[4] = x1r - x2i;
2969  a[5] = x1i + x2r;
2970  a[6] = x1r + x2i;
2971  a[7] = x1i - x2r;
2972  x1r = y4r - y6i;
2973  x1i = y4i + y6r;
2974  x0r = y5r - y7i;
2975  x0i = y5i + y7r;
2976  x2r = wn4r * (x0r - x0i);
2977  x2i = wn4r * (x0i + x0r);
2978  a[8] = x1r + x2r;
2979  a[9] = x1i + x2i;
2980  a[10] = x1r - x2r;
2981  a[11] = x1i - x2i;
2982  x1r = y4r + y6i;
2983  x1i = y4i - y6r;
2984  x0r = y5r + y7i;
2985  x0i = y5i - y7r;
2986  x2r = wn4r * (x0r - x0i);
2987  x2i = wn4r * (x0i + x0r);
2988  a[12] = x1r - x2i;
2989  a[13] = x1i + x2r;
2990  a[14] = x1r + x2i;
2991  a[15] = x1i - x2r;
2992  x1r = y8r + y10r;
2993  x1i = y8i + y10i;
2994  x2r = y9r - y11r;
2995  x2i = y9i - y11i;
2996  a[16] = x1r + x2r;
2997  a[17] = x1i + x2i;
2998  a[18] = x1r - x2r;
2999  a[19] = x1i - x2i;
3000  x1r = y8r - y10r;
3001  x1i = y8i - y10i;
3002  x2r = y9r + y11r;
3003  x2i = y9i + y11i;
3004  a[20] = x1r - x2i;
3005  a[21] = x1i + x2r;
3006  a[22] = x1r + x2i;
3007  a[23] = x1i - x2r;
3008  x1r = y12r - y14i;
3009  x1i = y12i + y14r;
3010  x0r = y13r + y15i;
3011  x0i = y13i - y15r;
3012  x2r = wn4r * (x0r - x0i);
3013  x2i = wn4r * (x0i + x0r);
3014  a[24] = x1r + x2r;
3015  a[25] = x1i + x2i;
3016  a[26] = x1r - x2r;
3017  a[27] = x1i - x2i;
3018  x1r = y12r + y14i;
3019  x1i = y12i - y14r;
3020  x0r = y13r - y15i;
3021  x0i = y13i + y15r;
3022  x2r = wn4r * (x0r - x0i);
3023  x2i = wn4r * (x0i + x0r);
3024  a[28] = x1r - x2i;
3025  a[29] = x1i + x2r;
3026  a[30] = x1r + x2i;
3027  a[31] = x1i - x2r;
3028 }
3029 
3030 
3031 extern "C" void cftf081(double *a, double *w)
3032 {
3033  double wn4r, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i,
3034  y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i,
3035  y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i;
3036 
3037  wn4r = w[1];
3038  x0r = a[0] + a[8];
3039  x0i = a[1] + a[9];
3040  x1r = a[0] - a[8];
3041  x1i = a[1] - a[9];
3042  x2r = a[4] + a[12];
3043  x2i = a[5] + a[13];
3044  x3r = a[4] - a[12];
3045  x3i = a[5] - a[13];
3046  y0r = x0r + x2r;
3047  y0i = x0i + x2i;
3048  y2r = x0r - x2r;
3049  y2i = x0i - x2i;
3050  y1r = x1r - x3i;
3051  y1i = x1i + x3r;
3052  y3r = x1r + x3i;
3053  y3i = x1i - x3r;
3054  x0r = a[2] + a[10];
3055  x0i = a[3] + a[11];
3056  x1r = a[2] - a[10];
3057  x1i = a[3] - a[11];
3058  x2r = a[6] + a[14];
3059  x2i = a[7] + a[15];
3060  x3r = a[6] - a[14];
3061  x3i = a[7] - a[15];
3062  y4r = x0r + x2r;
3063  y4i = x0i + x2i;
3064  y6r = x0r - x2r;
3065  y6i = x0i - x2i;
3066  x0r = x1r - x3i;
3067  x0i = x1i + x3r;
3068  x2r = x1r + x3i;
3069  x2i = x1i - x3r;
3070  y5r = wn4r * (x0r - x0i);
3071  y5i = wn4r * (x0r + x0i);
3072  y7r = wn4r * (x2r - x2i);
3073  y7i = wn4r * (x2r + x2i);
3074  a[8] = y1r + y5r;
3075  a[9] = y1i + y5i;
3076  a[10] = y1r - y5r;
3077  a[11] = y1i - y5i;
3078  a[12] = y3r - y7i;
3079  a[13] = y3i + y7r;
3080  a[14] = y3r + y7i;
3081  a[15] = y3i - y7r;
3082  a[0] = y0r + y4r;
3083  a[1] = y0i + y4i;
3084  a[2] = y0r - y4r;
3085  a[3] = y0i - y4i;
3086  a[4] = y2r - y6i;
3087  a[5] = y2i + y6r;
3088  a[6] = y2r + y6i;
3089  a[7] = y2i - y6r;
3090 }
3091 
3092 
3093 extern "C" void cftf082(double *a, double *w)
3094 {
3095  double wn4r, wk1r, wk1i, x0r, x0i, x1r, x1i,
3096  y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i,
3097  y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i;
3098 
3099  wn4r = w[1];
3100  wk1r = w[2];
3101  wk1i = w[3];
3102  y0r = a[0] - a[9];
3103  y0i = a[1] + a[8];
3104  y1r = a[0] + a[9];
3105  y1i = a[1] - a[8];
3106  x0r = a[4] - a[13];
3107  x0i = a[5] + a[12];
3108  y2r = wn4r * (x0r - x0i);
3109  y2i = wn4r * (x0i + x0r);
3110  x0r = a[4] + a[13];
3111  x0i = a[5] - a[12];
3112  y3r = wn4r * (x0r - x0i);
3113  y3i = wn4r * (x0i + x0r);
3114  x0r = a[2] - a[11];
3115  x0i = a[3] + a[10];
3116  y4r = wk1r * x0r - wk1i * x0i;
3117  y4i = wk1r * x0i + wk1i * x0r;
3118  x0r = a[2] + a[11];
3119  x0i = a[3] - a[10];
3120  y5r = wk1i * x0r - wk1r * x0i;
3121  y5i = wk1i * x0i + wk1r * x0r;
3122  x0r = a[6] - a[15];
3123  x0i = a[7] + a[14];
3124  y6r = wk1i * x0r - wk1r * x0i;
3125  y6i = wk1i * x0i + wk1r * x0r;
3126  x0r = a[6] + a[15];
3127  x0i = a[7] - a[14];
3128  y7r = wk1r * x0r - wk1i * x0i;
3129  y7i = wk1r * x0i + wk1i * x0r;
3130  x0r = y0r + y2r;
3131  x0i = y0i + y2i;
3132  x1r = y4r + y6r;
3133  x1i = y4i + y6i;
3134  a[0] = x0r + x1r;
3135  a[1] = x0i + x1i;
3136  a[2] = x0r - x1r;
3137  a[3] = x0i - x1i;
3138  x0r = y0r - y2r;
3139  x0i = y0i - y2i;
3140  x1r = y4r - y6r;
3141  x1i = y4i - y6i;
3142  a[4] = x0r - x1i;
3143  a[5] = x0i + x1r;
3144  a[6] = x0r + x1i;
3145  a[7] = x0i - x1r;
3146  x0r = y1r - y3i;
3147  x0i = y1i + y3r;
3148  x1r = y5r - y7r;
3149  x1i = y5i - y7i;
3150  a[8] = x0r + x1r;
3151  a[9] = x0i + x1i;
3152  a[10] = x0r - x1r;
3153  a[11] = x0i - x1i;
3154  x0r = y1r + y3i;
3155  x0i = y1i - y3r;
3156  x1r = y5r + y7r;
3157  x1i = y5i + y7i;
3158  a[12] = x0r - x1i;
3159  a[13] = x0i + x1r;
3160  a[14] = x0r + x1i;
3161  a[15] = x0i - x1r;
3162 }
3163 
3164 
3165 extern "C" void cftf040(double *a)
3166 {
3167  double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
3168 
3169  x0r = a[0] + a[4];
3170  x0i = a[1] + a[5];
3171  x1r = a[0] - a[4];
3172  x1i = a[1] - a[5];
3173  x2r = a[2] + a[6];
3174  x2i = a[3] + a[7];
3175  x3r = a[2] - a[6];
3176  x3i = a[3] - a[7];
3177  a[0] = x0r + x2r;
3178  a[1] = x0i + x2i;
3179  a[2] = x1r - x3i;
3180  a[3] = x1i + x3r;
3181  a[4] = x0r - x2r;
3182  a[5] = x0i - x2i;
3183  a[6] = x1r + x3i;
3184  a[7] = x1i - x3r;
3185 }
3186 
3187 
3188 extern "C" void cftb040(double *a)
3189 {
3190  double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
3191 
3192  x0r = a[0] + a[4];
3193  x0i = a[1] + a[5];
3194  x1r = a[0] - a[4];
3195  x1i = a[1] - a[5];
3196  x2r = a[2] + a[6];
3197  x2i = a[3] + a[7];
3198  x3r = a[2] - a[6];
3199  x3i = a[3] - a[7];
3200  a[0] = x0r + x2r;
3201  a[1] = x0i + x2i;
3202  a[2] = x1r + x3i;
3203  a[3] = x1i - x3r;
3204  a[4] = x0r - x2r;
3205  a[5] = x0i - x2i;
3206  a[6] = x1r - x3i;
3207  a[7] = x1i + x3r;
3208 }
3209 
3210 
3211 extern "C" void cftx020(double *a)
3212 {
3213  double x0r, x0i;
3214 
3215  x0r = a[0] - a[2];
3216  x0i = a[1] - a[3];
3217  a[0] += a[2];
3218  a[1] += a[3];
3219  a[2] = x0r;
3220  a[3] = x0i;
3221 }
3222 
3223 
3224 extern "C" void rftfsub(int n, double *a, int nc, double *c)
3225 {
3226  int j, k, kk, ks, m;
3227  double wkr, wki, xr, xi, yr, yi;
3228 
3229  m = n >> 1;
3230  ks = 2 * nc / m;
3231  kk = 0;
3232  for (j = 2; j < m; j += 2) {
3233  k = n - j;
3234  kk += ks;
3235  wkr = 0.5 - c[nc - kk];
3236  wki = c[kk];
3237  xr = a[j] - a[k];
3238  xi = a[j + 1] + a[k + 1];
3239  yr = wkr * xr - wki * xi;
3240  yi = wkr * xi + wki * xr;
3241  a[j] -= yr;
3242  a[j + 1] -= yi;
3243  a[k] += yr;
3244  a[k + 1] -= yi;
3245  }
3246 }
3247 
3248 
3249 extern "C" void rftbsub(int n, double *a, int nc, double *c)
3250 {
3251  int j, k, kk, ks, m;
3252  double wkr, wki, xr, xi, yr, yi;
3253 
3254  m = n >> 1;
3255  ks = 2 * nc / m;
3256  kk = 0;
3257  for (j = 2; j < m; j += 2) {
3258  k = n - j;
3259  kk += ks;
3260  wkr = 0.5 - c[nc - kk];
3261  wki = c[kk];
3262  xr = a[j] - a[k];
3263  xi = a[j + 1] + a[k + 1];
3264  yr = wkr * xr + wki * xi;
3265  yi = wkr * xi - wki * xr;
3266  a[j] -= yr;
3267  a[j + 1] -= yi;
3268  a[k] += yr;
3269  a[k + 1] -= yi;
3270  }
3271 }
3272 
3273 
3274 extern "C" void dctsub(int n, double *a, int nc, double *c)
3275 {
3276  int j, k, kk, ks, m;
3277  double wkr, wki, xr;
3278 
3279  m = n >> 1;
3280  ks = nc / n;
3281  kk = 0;
3282  for (j = 1; j < m; j++) {
3283  k = n - j;
3284  kk += ks;
3285  wkr = c[kk] - c[nc - kk];
3286  wki = c[kk] + c[nc - kk];
3287  xr = wki * a[j] - wkr * a[k];
3288  a[j] = wkr * a[j] + wki * a[k];
3289  a[k] = xr;
3290  }
3291  a[m] *= c[0];
3292 }
3293 
3294 
3295 extern "C" void dstsub(int n, double *a, int nc, double *c)
3296 {
3297  int j, k, kk, ks, m;
3298  double wkr, wki, xr;
3299 
3300  m = n >> 1;
3301  ks = nc / n;
3302  kk = 0;
3303  for (j = 1; j < m; j++) {
3304  k = n - j;
3305  kk += ks;
3306  wkr = c[kk] - c[nc - kk];
3307  wki = c[kk] + c[nc - kk];
3308  xr = wki * a[k] - wkr * a[j];
3309  a[k] = wkr * a[k] + wki * a[j];
3310  a[j] = xr;
3311  }
3312  a[m] *= c[0];
3313 }
3314