Jamoma API  0.6.0.a19
fftsg3d.cpp
1 /*
2 Fast Fourier/Cosine/Sine Transform
3  dimension :three
4  data length :power of 2
5  decimation :frequency
7  data :inplace
8  table :use
9 functions
10  cdft3d: Complex Discrete Fourier Transform
11  rdft3d: Real Discrete Fourier Transform
12  ddct3d: Discrete Cosine Transform
13  ddst3d: Discrete Sine Transform
14 function prototypes
15  void cdft3d(int, int, int, int, double ***, double *, int *, double *);
16  void rdft3d(int, int, int, int, double ***, double *, int *, double *);
17  void rdft3dsort(int, int, int, int, double ***);
18  void ddct3d(int, int, int, int, double ***, double *, int *, double *);
19  void ddst3d(int, int, int, int, double ***, double *, int *, double *);
20 necessary package
21  fftsg.c : 1D-FFT package
22 macro definitions
24  FFT3D_MAX_THREADS : must be 2^N, default=4
27  FFT3D_MAX_THREADS : must be 2^N, default=4
29
30
31 -------- Complex DFT (Discrete Fourier Transform) --------
32  [definition]
33  <case1>
34  X[k1][k2][k3] = sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1
35  x[j1][j2][j3] *
36  exp(2*pi*i*j1*k1/n1) *
37  exp(2*pi*i*j2*k2/n2) *
38  exp(2*pi*i*j3*k3/n3),
39  0<=k1<n1, 0<=k2<n2, 0<=k3<n3
40  <case2>
41  X[k1][k2][k3] = sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1
42  x[j1][j2][j3] *
43  exp(-2*pi*i*j1*k1/n1) *
44  exp(-2*pi*i*j2*k2/n2) *
45  exp(-2*pi*i*j3*k3/n3),
46  0<=k1<n1, 0<=k2<n2, 0<=k3<n3
47  (notes: sum_j=0^n-1 is a summation from j=0 to n-1)
48  [usage]
49  <case1>
50  ip[0] = 0; // first time only
51  cdft3d(n1, n2, 2*n3, 1, a, t, ip, w);
52  <case2>
53  ip[0] = 0; // first time only
54  cdft3d(n1, n2, 2*n3, -1, a, t, ip, w);
55  [parameters]
56  n1 :data length (int)
57  n1 >= 1, n1 = power of 2
58  n2 :data length (int)
59  n2 >= 1, n2 = power of 2
60  2*n3 :data length (int)
61  n3 >= 1, n3 = power of 2
62  a[0...n1-1][0...n2-1][0...2*n3-1]
63  :input/output data (double ***)
64  input data
65  a[j1][j2][2*j3] = Re(x[j1][j2][j3]),
66  a[j1][j2][2*j3+1] = Im(x[j1][j2][j3]),
67  0<=j1<n1, 0<=j2<n2, 0<=j3<n3
68  output data
69  a[k1][k2][2*k3] = Re(X[k1][k2][k3]),
70  a[k1][k2][2*k3+1] = Im(X[k1][k2][k3]),
71  0<=k1<n1, 0<=k2<n2, 0<=k3<n3
72  t[0...*]
73  :work area (double *)
74  length of t >= max(8*n1, 8*n2), if single thread,
75  length of t >= max(8*n1, 8*n2)*FFT3D_MAX_THREADS,
77  t is dynamically allocated, if t == NULL.
78  ip[0...*]
79  :work area for bit reversal (int *)
80  length of ip >= 2+sqrt(n)
81  (n = max(n1, n2, n3))
82  ip[0],ip[1] are pointers of the cos/sin table.
83  w[0...*]
84  :cos/sin table (double *)
85  length of w >= max(n1/2, n2/2, n3/2)
86  w[],ip[] are initialized if ip[0] == 0.
87  [remark]
88  Inverse of
89  cdft3d(n1, n2, 2*n3, -1, a, t, ip, w);
90  is
91  cdft3d(n1, n2, 2*n3, 1, a, t, ip, w);
92  for (j1 = 0; j1 <= n1 - 1; j1++) {
93  for (j2 = 0; j2 <= n2 - 1; j2++) {
94  for (j3 = 0; j3 <= 2 * n3 - 1; j3++) {
95  a[j1][j2][j3] *= 1.0 / n1 / n2 / n3;
96  }
97  }
98  }
99  .
100
101
102 -------- Real DFT / Inverse of Real DFT --------
103  [definition]
104  <case1> RDFT
105  R[k1][k2][k3] = sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1
106  a[j1][j2][j3] *
107  cos(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2 +
108  2*pi*j3*k3/n3),
109  0<=k1<n1, 0<=k2<n2, 0<=k3<n3
110  I[k1][k2][k3] = sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1
111  a[j1][j2][j3] *
112  sin(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2 +
113  2*pi*j3*k3/n3),
114  0<=k1<n1, 0<=k2<n2, 0<=k3<n3
115  <case2> IRDFT (excluding scale)
116  a[k1][k2][k3] = (1/2) * sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1
117  (R[j1][j2][j3] *
118  cos(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2 +
119  2*pi*j3*k3/n3) +
120  I[j1][j2][j3] *
121  sin(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2 +
122  2*pi*j3*k3/n3)),
123  0<=k1<n1, 0<=k2<n2, 0<=k3<n3
124  (notes: R[(n1-k1)%n1][(n2-k2)%n2][(n3-k3)%n3] = R[k1][k2][k3],
125  I[(n1-k1)%n1][(n2-k2)%n2][(n3-k3)%n3] = -I[k1][k2][k3],
126  0<=k1<n1, 0<=k2<n2, 0<=k3<n3)
127  [usage]
128  <case1>
129  ip[0] = 0; // first time only
130  rdft3d(n1, n2, n3, 1, a, t, ip, w);
131  <case2>
132  ip[0] = 0; // first time only
133  rdft3d(n1, n2, n3, -1, a, t, ip, w);
134  [parameters]
135  n1 :data length (int)
136  n1 >= 2, n1 = power of 2
137  n2 :data length (int)
138  n2 >= 2, n2 = power of 2
139  n3 :data length (int)
140  n3 >= 2, n3 = power of 2
141  a[0...n1-1][0...n2-1][0...n3-1]
142  :input/output data (double ***)
143  <case1>
144  output data
145  a[k1][k2][2*k3] = R[k1][k2][k3]
146  = R[(n1-k1)%n1][(n2-k2)%n2][n3-k3],
147  a[k1][k2][2*k3+1] = I[k1][k2][k3]
148  = -I[(n1-k1)%n1][(n2-k2)%n2][n3-k3],
149  0<=k1<n1, 0<=k2<n2, 0<k3<n3/2,
150  (n%m : n mod m),
151  a[k1][k2][0] = R[k1][k2][0]
152  = R[(n1-k1)%n1][n2-k2][0],
153  a[k1][k2][1] = I[k1][k2][0]
154  = -I[(n1-k1)%n1][n2-k2][0],
155  a[k1][n2-k2][1] = R[(n1-k1)%n1][k2][n3/2]
156  = R[k1][n2-k2][n3/2],
157  a[k1][n2-k2][0] = -I[(n1-k1)%n1][k2][n3/2]
158  = I[k1][n2-k2][n3/2],
159  0<=k1<n1, 0<k2<n2/2,
160  a[k1][0][0] = R[k1][0][0]
161  = R[n1-k1][0][0],
162  a[k1][0][1] = I[k1][0][0]
163  = -I[n1-k1][0][0],
164  a[k1][n2/2][0] = R[k1][n2/2][0]
165  = R[n1-k1][n2/2][0],
166  a[k1][n2/2][1] = I[k1][n2/2][0]
167  = -I[n1-k1][n2/2][0],
168  a[n1-k1][0][1] = R[k1][0][n3/2]
169  = R[n1-k1][0][n3/2],
170  a[n1-k1][0][0] = -I[k1][0][n3/2]
171  = I[n1-k1][0][n3/2],
172  a[n1-k1][n2/2][1] = R[k1][n2/2][n3/2]
173  = R[n1-k1][n2/2][n3/2],
174  a[n1-k1][n2/2][0] = -I[k1][n2/2][n3/2]
175  = I[n1-k1][n2/2][n3/2],
176  0<k1<n1/2,
177  a[0][0][0] = R[0][0][0],
178  a[0][0][1] = R[0][0][n3/2],
179  a[0][n2/2][0] = R[0][n2/2][0],
180  a[0][n2/2][1] = R[0][n2/2][n3/2],
181  a[n1/2][0][0] = R[n1/2][0][0],
182  a[n1/2][0][1] = R[n1/2][0][n3/2],
183  a[n1/2][n2/2][0] = R[n1/2][n2/2][0],
184  a[n1/2][n2/2][1] = R[n1/2][n2/2][n3/2]
185  <case2>
186  input data
187  a[j1][j2][2*j3] = R[j1][j2][j3]
188  = R[(n1-j1)%n1][(n2-j2)%n2][n3-j3],
189  a[j1][j2][2*j3+1] = I[j1][j2][j3]
190  = -I[(n1-j1)%n1][(n2-j2)%n2][n3-j3],
191  0<=j1<n1, 0<=j2<n2, 0<j3<n3/2,
192  a[j1][j2][0] = R[j1][j2][0]
193  = R[(n1-j1)%n1][n2-j2][0],
194  a[j1][j2][1] = I[j1][j2][0]
195  = -I[(n1-j1)%n1][n2-j2][0],
196  a[j1][n2-j2][1] = R[(n1-j1)%n1][j2][n3/2]
197  = R[j1][n2-j2][n3/2],
198  a[j1][n2-j2][0] = -I[(n1-j1)%n1][j2][n3/2]
199  = I[j1][n2-j2][n3/2],
200  0<=j1<n1, 0<j2<n2/2,
201  a[j1][0][0] = R[j1][0][0]
202  = R[n1-j1][0][0],
203  a[j1][0][1] = I[j1][0][0]
204  = -I[n1-j1][0][0],
205  a[j1][n2/2][0] = R[j1][n2/2][0]
206  = R[n1-j1][n2/2][0],
207  a[j1][n2/2][1] = I[j1][n2/2][0]
208  = -I[n1-j1][n2/2][0],
209  a[n1-j1][0][1] = R[j1][0][n3/2]
210  = R[n1-j1][0][n3/2],
211  a[n1-j1][0][0] = -I[j1][0][n3/2]
212  = I[n1-j1][0][n3/2],
213  a[n1-j1][n2/2][1] = R[j1][n2/2][n3/2]
214  = R[n1-j1][n2/2][n3/2],
215  a[n1-j1][n2/2][0] = -I[j1][n2/2][n3/2]
216  = I[n1-j1][n2/2][n3/2],
217  0<j1<n1/2,
218  a[0][0][0] = R[0][0][0],
219  a[0][0][1] = R[0][0][n3/2],
220  a[0][n2/2][0] = R[0][n2/2][0],
221  a[0][n2/2][1] = R[0][n2/2][n3/2],
222  a[n1/2][0][0] = R[n1/2][0][0],
223  a[n1/2][0][1] = R[n1/2][0][n3/2],
224  a[n1/2][n2/2][0] = R[n1/2][n2/2][0],
225  a[n1/2][n2/2][1] = R[n1/2][n2/2][n3/2]
226  ---- output ordering ----
227  rdft3d(n1, n2, n3, 1, a, t, ip, w);
228  rdft3dsort(n1, n2, n3, 1, a);
229  // stored data is a[0...n1-1][0...n2-1][0...n3+1]:
230  // a[k1][k2][2*k3] = R[k1][k2][k3],
231  // a[k1][k2][2*k3+1] = I[k1][k2][k3],
232  // 0<=k1<n1, 0<=k2<n2, 0<=k3<=n3/2.
233  // the stored data is larger than the input data!
234  ---- input ordering ----
235  rdft3dsort(n1, n2, n3, -1, a);
236  rdft3d(n1, n2, n3, -1, a, t, ip, w);
237  t[0...*]
238  :work area (double *)
239  length of t >= max(8*n1, 8*n2), if single thread,
240  length of t >= max(8*n1, 8*n2)*FFT3D_MAX_THREADS,
242  t is dynamically allocated, if t == NULL.
243  ip[0...*]
244  :work area for bit reversal (int *)
245  length of ip >= 2+sqrt(n)
246  (n = max(n1, n2, n3/2))
247  ip[0],ip[1] are pointers of the cos/sin table.
248  w[0...*]
249  :cos/sin table (double *)
250  length of w >= max(n1/2, n2/2, n3/4) + n3/4
251  w[],ip[] are initialized if ip[0] == 0.
252  [remark]
253  Inverse of
254  rdft3d(n1, n2, n3, 1, a, t, ip, w);
255  is
256  rdft3d(n1, n2, n3, -1, a, t, ip, w);
257  for (j1 = 0; j1 <= n1 - 1; j1++) {
258  for (j2 = 0; j2 <= n2 - 1; j2++) {
259  for (j3 = 0; j3 <= n3 - 1; j3++) {
260  a[j1][j2][j3] *= 2.0 / n1 / n2 / n3;
261  }
262  }
263  }
264  .
265
266
267 -------- DCT (Discrete Cosine Transform) / Inverse of DCT --------
268  [definition]
269  <case1> IDCT (excluding scale)
270  C[k1][k2][k3] = sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1
271  a[j1][j2][j3] *
272  cos(pi*j1*(k1+1/2)/n1) *
273  cos(pi*j2*(k2+1/2)/n2) *
274  cos(pi*j3*(k3+1/2)/n3),
275  0<=k1<n1, 0<=k2<n2, 0<=k3<n3
276  <case2> DCT
277  C[k1][k2][k3] = sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1
278  a[j1][j2][j3] *
279  cos(pi*(j1+1/2)*k1/n1) *
280  cos(pi*(j2+1/2)*k2/n2) *
281  cos(pi*(j3+1/2)*k3/n3),
282  0<=k1<n1, 0<=k2<n2, 0<=k3<n3
283  [usage]
284  <case1>
285  ip[0] = 0; // first time only
286  ddct3d(n1, n2, n3, 1, a, t, ip, w);
287  <case2>
288  ip[0] = 0; // first time only
289  ddct3d(n1, n2, n3, -1, a, t, ip, w);
290  [parameters]
291  n1 :data length (int)
292  n1 >= 2, n1 = power of 2
293  n2 :data length (int)
294  n2 >= 2, n2 = power of 2
295  n3 :data length (int)
296  n3 >= 2, n3 = power of 2
297  a[0...n1-1][0...n2-1][0...n3-1]
298  :input/output data (double ***)
299  output data
300  a[k1][k2][k3] = C[k1][k2][k3],
301  0<=k1<n1, 0<=k2<n2, 0<=k3<n3
302  t[0...*]
303  :work area (double *)
304  length of t >= max(4*n1, 4*n2), if single thread,
305  length of t >= max(4*n1, 4*n2)*FFT3D_MAX_THREADS,
307  t is dynamically allocated, if t == NULL.
308  ip[0...*]
309  :work area for bit reversal (int *)
310  length of ip >= 2+sqrt(n)
311  (n = max(n1/2, n2/2, n3/2))
312  ip[0],ip[1] are pointers of the cos/sin table.
313  w[0...*]
314  :cos/sin table (double *)
315  length of w >= max(n1*3/2, n2*3/2, n3*3/2)
316  w[],ip[] are initialized if ip[0] == 0.
317  [remark]
318  Inverse of
319  ddct3d(n1, n2, n3, -1, a, t, ip, w);
320  is
321  for (j1 = 0; j1 <= n1 - 1; j1++) {
322  for (j2 = 0; j2 <= n2 - 1; j2++) {
323  a[j1][j2][0] *= 0.5;
324  }
325  for (j3 = 0; j3 <= n3 - 1; j3++) {
326  a[j1][0][j3] *= 0.5;
327  }
328  }
329  for (j2 = 0; j2 <= n2 - 1; j2++) {
330  for (j3 = 0; j3 <= n3 - 1; j3++) {
331  a[0][j2][j3] *= 0.5;
332  }
333  }
334  ddct3d(n1, n2, n3, 1, a, t, ip, w);
335  for (j1 = 0; j1 <= n1 - 1; j1++) {
336  for (j2 = 0; j2 <= n2 - 1; j2++) {
337  for (j3 = 0; j3 <= n3 - 1; j3++) {
338  a[j1][j2][j3] *= 8.0 / n1 / n2 / n3;
339  }
340  }
341  }
342  .
343
344
345 -------- DST (Discrete Sine Transform) / Inverse of DST --------
346  [definition]
347  <case1> IDST (excluding scale)
348  S[k1][k2][k3] = sum_j1=1^n1 sum_j2=1^n2 sum_j3=1^n3
349  A[j1][j2][j3] *
350  sin(pi*j1*(k1+1/2)/n1) *
351  sin(pi*j2*(k2+1/2)/n2) *
352  sin(pi*j3*(k3+1/2)/n3),
353  0<=k1<n1, 0<=k2<n2, 0<=k3<n3
354  <case2> DST
355  S[k1][k2][k3] = sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1
356  a[j1][j2][j3] *
357  sin(pi*(j1+1/2)*k1/n1) *
358  sin(pi*(j2+1/2)*k2/n2) *
359  sin(pi*(j3+1/2)*k3/n3),
360  0<k1<=n1, 0<k2<=n2, 0<k3<=n3
361  [usage]
362  <case1>
363  ip[0] = 0; // first time only
364  ddst3d(n1, n2, n3, 1, a, t, ip, w);
365  <case2>
366  ip[0] = 0; // first time only
367  ddst3d(n1, n2, n3, -1, a, t, ip, w);
368  [parameters]
369  n1 :data length (int)
370  n1 >= 2, n1 = power of 2
371  n2 :data length (int)
372  n2 >= 2, n2 = power of 2
373  n3 :data length (int)
374  n3 >= 2, n3 = power of 2
375  a[0...n1-1][0...n2-1][0...n3-1]
376  :input/output data (double ***)
377  <case1>
378  input data
379  a[j1%n1][j2%n2][j3%n3] = A[j1][j2][j3],
380  0<j1<=n1, 0<j2<=n2, 0<j3<=n3,
381  (n%m : n mod m)
382  output data
383  a[k1][k2][k3] = S[k1][k2][k3],
384  0<=k1<n1, 0<=k2<n2, 0<=k3<n3
385  <case2>
386  output data
387  a[k1%n1][k2%n2][k3%n3] = S[k1][k2][k3],
388  0<k1<=n1, 0<k2<=n2, 0<k3<=n3
389  t[0...*]
390  :work area (double *)
391  length of t >= max(4*n1, 4*n2), if single thread,
392  length of t >= max(4*n1, 4*n2)*FFT3D_MAX_THREADS,
394  t is dynamically allocated, if t == NULL.
395  ip[0...*]
396  :work area for bit reversal (int *)
397  length of ip >= 2+sqrt(n)
398  (n = max(n1/2, n2/2, n3/2))
399  ip[0],ip[1] are pointers of the cos/sin table.
400  w[0...*]
401  :cos/sin table (double *)
402  length of w >= max(n1*3/2, n2*3/2, n3*3/2)
403  w[],ip[] are initialized if ip[0] == 0.
404  [remark]
405  Inverse of
406  ddst3d(n1, n2, n3, -1, a, t, ip, w);
407  is
408  for (j1 = 0; j1 <= n1 - 1; j1++) {
409  for (j2 = 0; j2 <= n2 - 1; j2++) {
410  a[j1][j2][0] *= 0.5;
411  }
412  for (j3 = 0; j3 <= n3 - 1; j3++) {
413  a[j1][0][j3] *= 0.5;
414  }
415  }
416  for (j2 = 0; j2 <= n2 - 1; j2++) {
417  for (j3 = 0; j3 <= n3 - 1; j3++) {
418  a[0][j2][j3] *= 0.5;
419  }
420  }
421  ddst3d(n1, n2, n3, 1, a, t, ip, w);
422  for (j1 = 0; j1 <= n1 - 1; j1++) {
423  for (j2 = 0; j2 <= n2 - 1; j2++) {
424  for (j3 = 0; j3 <= n3 - 1; j3++) {
425  a[j1][j2][j3] *= 8.0 / n1 / n2 / n3;
426  }
427  }
428  }
429  .
430 */
431
432
433 #include <stdio.h>
434 #include <stdlib.h>
435 #define fft3d_alloc_error_check(p) { \
436  if ((p) == NULL) { \
437  fprintf(stderr, "fft3d memory allocation error\n"); \
438  exit(1); \
439  } \
440 }
441
442
447 #endif
450 #endif
454  if (pthread_create(thp, NULL, func, (void *) (argp)) != 0) { \
455  fprintf(stderr, "fft3d thread error\n"); \
456  exit(1); \
457  } \
458 }
460  if (pthread_join(th, NULL) != 0) { \
461  fprintf(stderr, "fft3d thread error\n"); \
462  exit(1); \
463  } \
464 }
466
467
472 #endif
475 #endif
476 #include <windows.h>
479  DWORD thid; \
480  *(thp) = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE) (func), (LPVOID) (argp), 0, &thid); \
481  if (*(thp) == 0) { \
482  fprintf(stderr, "fft3d thread error\n"); \
483  exit(1); \
484  } \
485 }
487  WaitForSingleObject(th, INFINITE); \
488  CloseHandle(th); \
489 }
491
492
493 void cdft3d(int n1, int n2, int n3, int isgn, double ***a,
494  double *t, int *ip, double *w)
495 {
496  void makewt(int nw, int *ip, double *w);
497  void xdft3da_sub(int n1, int n2, int n3, int icr, int isgn,
498  double ***a, double *t, int *ip, double *w);
499  void cdft3db_sub(int n1, int n2, int n3, int isgn, double ***a,
500  double *t, int *ip, double *w);
502  void xdft3da_subth(int n1, int n2, int n3, int icr, int isgn,
503  double ***a, double *t, int *ip, double *w);
504  void cdft3db_subth(int n1, int n2, int n3, int isgn, double ***a,
505  double *t, int *ip, double *w);
507  int n, itnull, nt;
508
509  n = n1;
510  if (n < n2) {
511  n = n2;
512  }
513  n <<= 1;
514  if (n < n3) {
515  n = n3;
516  }
517  if (n > (ip[0] << 2)) {
518  makewt(n >> 2, ip, w);
519  }
520  itnull = 0;
521  if (t == NULL) {
522  itnull = 1;
523  nt = n1;
524  if (nt < n2) {
525  nt = n2;
526  }
527  nt *= 8;
531  if (n3 == 4) {
532  nt >>= 1;
533  } else if (n3 < 4) {
534  nt >>= 2;
535  }
536  t = (double *) malloc(sizeof(double) * nt);
537  fft3d_alloc_error_check(t);
538  }
540  if ((double) n1 * n2 * n3 >= (double) FFT3D_THREADS_BEGIN_N) {
541  xdft3da_subth(n1, n2, n3, 0, isgn, a, t, ip, w);
542  cdft3db_subth(n1, n2, n3, isgn, a, t, ip, w);
543  } else
545  {
546  xdft3da_sub(n1, n2, n3, 0, isgn, a, t, ip, w);
547  cdft3db_sub(n1, n2, n3, isgn, a, t, ip, w);
548  }
549  if (itnull != 0) {
550  free(t);
551  }
552 }
553
554
555 void rdft3d(int n1, int n2, int n3, int isgn, double ***a,
556  double *t, int *ip, double *w)
557 {
558  void makewt(int nw, int *ip, double *w);
559  void makect(int nc, int *ip, double *c);
560  void xdft3da_sub(int n1, int n2, int n3, int icr, int isgn,
561  double ***a, double *t, int *ip, double *w);
562  void cdft3db_sub(int n1, int n2, int n3, int isgn, double ***a,
563  double *t, int *ip, double *w);
564  void rdft3d_sub(int n1, int n2, int n3, int isgn, double ***a);
566  void xdft3da_subth(int n1, int n2, int n3, int icr, int isgn,
567  double ***a, double *t, int *ip, double *w);
568  void cdft3db_subth(int n1, int n2, int n3, int isgn, double ***a,
569  double *t, int *ip, double *w);
571  int n, nw, nc, itnull, nt;
572
573  n = n1;
574  if (n < n2) {
575  n = n2;
576  }
577  n <<= 1;
578  if (n < n3) {
579  n = n3;
580  }
581  nw = ip[0];
582  if (n > (nw << 2)) {
583  nw = n >> 2;
584  makewt(nw, ip, w);
585  }
586  nc = ip[1];
587  if (n3 > (nc << 2)) {
588  nc = n3 >> 2;
589  makect(nc, ip, w + nw);
590  }
591  itnull = 0;
592  if (t == NULL) {
593  itnull = 1;
594  nt = n1;
595  if (nt < n2) {
596  nt = n2;
597  }
598  nt *= 8;
602  if (n3 == 4) {
603  nt >>= 1;
604  } else if (n3 < 4) {
605  nt >>= 2;
606  }
607  t = (double *) malloc(sizeof(double) * nt);
608  fft3d_alloc_error_check(t);
609  }
611  if ((double) n1 * n2 * n3 >= (double) FFT3D_THREADS_BEGIN_N) {
612  if (isgn < 0) {
613  rdft3d_sub(n1, n2, n3, isgn, a);
614  cdft3db_subth(n1, n2, n3, isgn, a, t, ip, w);
615  }
616  xdft3da_subth(n1, n2, n3, 1, isgn, a, t, ip, w);
617  if (isgn >= 0) {
618  cdft3db_subth(n1, n2, n3, isgn, a, t, ip, w);
619  rdft3d_sub(n1, n2, n3, isgn, a);
620  }
621  } else
623  {
624  if (isgn < 0) {
625  rdft3d_sub(n1, n2, n3, isgn, a);
626  cdft3db_sub(n1, n2, n3, isgn, a, t, ip, w);
627  }
628  xdft3da_sub(n1, n2, n3, 1, isgn, a, t, ip, w);
629  if (isgn >= 0) {
630  cdft3db_sub(n1, n2, n3, isgn, a, t, ip, w);
631  rdft3d_sub(n1, n2, n3, isgn, a);
632  }
633  }
634  if (itnull != 0) {
635  free(t);
636  }
637 }
638
639
640 void rdft3dsort(int n1, int n2, int n3, int isgn, double ***a)
641 {
642  int n1h, n2h, i, j;
643  double x, y;
644
645  n1h = n1 >> 1;
646  n2h = n2 >> 1;
647  if (isgn < 0) {
648  for (i = 0; i < n1; i++) {
649  for (j = n2h + 1; j < n2; j++) {
650  a[i][j][0] = a[i][j][n3 + 1];
651  a[i][j][1] = a[i][j][n3];
652  }
653  }
654  for (i = n1h + 1; i < n1; i++) {
655  a[i][0][0] = a[i][0][n3 + 1];
656  a[i][0][1] = a[i][0][n3];
657  a[i][n2h][0] = a[i][n2h][n3 + 1];
658  a[i][n2h][1] = a[i][n2h][n3];
659  }
660  a[0][0][1] = a[0][0][n3];
661  a[0][n2h][1] = a[0][n2h][n3];
662  a[n1h][0][1] = a[n1h][0][n3];
663  a[n1h][n2h][1] = a[n1h][n2h][n3];
664  } else {
665  for (j = n2h + 1; j < n2; j++) {
666  y = a[0][j][0];
667  x = a[0][j][1];
668  a[0][j][n3] = x;
669  a[0][j][n3 + 1] = y;
670  a[0][n2 - j][n3] = x;
671  a[0][n2 - j][n3 + 1] = -y;
672  a[0][j][0] = a[0][n2 - j][0];
673  a[0][j][1] = -a[0][n2 - j][1];
674  }
675  for (i = 1; i < n1; i++) {
676  for (j = n2h + 1; j < n2; j++) {
677  y = a[i][j][0];
678  x = a[i][j][1];
679  a[i][j][n3] = x;
680  a[i][j][n3 + 1] = y;
681  a[n1 - i][n2 - j][n3] = x;
682  a[n1 - i][n2 - j][n3 + 1] = -y;
683  a[i][j][0] = a[n1 - i][n2 - j][0];
684  a[i][j][1] = -a[n1 - i][n2 - j][1];
685  }
686  }
687  for (i = n1h + 1; i < n1; i++) {
688  y = a[i][0][0];
689  x = a[i][0][1];
690  a[i][0][n3] = x;
691  a[i][0][n3 + 1] = y;
692  a[n1 - i][0][n3] = x;
693  a[n1 - i][0][n3 + 1] = -y;
694  a[i][0][0] = a[n1 - i][0][0];
695  a[i][0][1] = -a[n1 - i][0][1];
696  y = a[i][n2h][0];
697  x = a[i][n2h][1];
698  a[i][n2h][n3] = x;
699  a[i][n2h][n3 + 1] = y;
700  a[n1 - i][n2h][n3] = x;
701  a[n1 - i][n2h][n3 + 1] = -y;
702  a[i][n2h][0] = a[n1 - i][n2h][0];
703  a[i][n2h][1] = -a[n1 - i][n2h][1];
704  }
705  a[0][0][n3] = a[0][0][1];
706  a[0][0][n3 + 1] = 0;
707  a[0][0][1] = 0;
708  a[0][n2h][n3] = a[0][n2h][1];
709  a[0][n2h][n3 + 1] = 0;
710  a[0][n2h][1] = 0;
711  a[n1h][0][n3] = a[n1h][0][1];
712  a[n1h][0][n3 + 1] = 0;
713  a[n1h][0][1] = 0;
714  a[n1h][n2h][n3] = a[n1h][n2h][1];
715  a[n1h][n2h][n3 + 1] = 0;
716  a[n1h][n2h][1] = 0;
717  }
718 }
719
720
721 void ddct3d(int n1, int n2, int n3, int isgn, double ***a,
722  double *t, int *ip, double *w)
723 {
724  void makewt(int nw, int *ip, double *w);
725  void makect(int nc, int *ip, double *c);
726  void ddxt3da_sub(int n1, int n2, int n3, int ics, int isgn,
727  double ***a, double *t, int *ip, double *w);
728  void ddxt3db_sub(int n1, int n2, int n3, int ics, int isgn,
729  double ***a, double *t, int *ip, double *w);
731  void ddxt3da_subth(int n1, int n2, int n3, int ics, int isgn,
732  double ***a, double *t, int *ip, double *w);
733  void ddxt3db_subth(int n1, int n2, int n3, int ics, int isgn,
734  double ***a, double *t, int *ip, double *w);
736  int n, nw, nc, itnull, nt;
737
738  n = n1;
739  if (n < n2) {
740  n = n2;
741  }
742  if (n < n3) {
743  n = n3;
744  }
745  nw = ip[0];
746  if (n > (nw << 2)) {
747  nw = n >> 2;
748  makewt(nw, ip, w);
749  }
750  nc = ip[1];
751  if (n > nc) {
752  nc = n;
753  makect(nc, ip, w + nw);
754  }
755  itnull = 0;
756  if (t == NULL) {
757  itnull = 1;
758  nt = n1;
759  if (nt < n2) {
760  nt = n2;
761  }
762  nt *= 4;
766  if (n3 == 2) {
767  nt >>= 1;
768  }
769  t = (double *) malloc(sizeof(double) * nt);
770  fft3d_alloc_error_check(t);
771  }
773  if ((double) n1 * n2 * n3 >= (double) FFT3D_THREADS_BEGIN_N) {
774  ddxt3da_subth(n1, n2, n3, 0, isgn, a, t, ip, w);
775  ddxt3db_subth(n1, n2, n3, 0, isgn, a, t, ip, w);
776  } else
778  {
779  ddxt3da_sub(n1, n2, n3, 0, isgn, a, t, ip, w);
780  ddxt3db_sub(n1, n2, n3, 0, isgn, a, t, ip, w);
781  }
782  if (itnull != 0) {
783  free(t);
784  }
785 }
786
787
788 void ddst3d(int n1, int n2, int n3, int isgn, double ***a,
789  double *t, int *ip, double *w)
790 {
791  void makewt(int nw, int *ip, double *w);
792  void makect(int nc, int *ip, double *c);
793  void ddxt3da_sub(int n1, int n2, int n3, int ics, int isgn,
794  double ***a, double *t, int *ip, double *w);
795  void ddxt3db_sub(int n1, int n2, int n3, int ics, int isgn,
796  double ***a, double *t, int *ip, double *w);
798  void ddxt3da_subth(int n1, int n2, int n3, int ics, int isgn,
799  double ***a, double *t, int *ip, double *w);
800  void ddxt3db_subth(int n1, int n2, int n3, int ics, int isgn,
801  double ***a, double *t, int *ip, double *w);
803  int n, nw, nc, itnull, nt;
804
805  n = n1;
806  if (n < n2) {
807  n = n2;
808  }
809  if (n < n3) {
810  n = n3;
811  }
812  nw = ip[0];
813  if (n > (nw << 2)) {
814  nw = n >> 2;
815  makewt(nw, ip, w);
816  }
817  nc = ip[1];
818  if (n > nc) {
819  nc = n;
820  makect(nc, ip, w + nw);
821  }
822  itnull = 0;
823  if (t == NULL) {
824  itnull = 1;
825  nt = n1;
826  if (nt < n2) {
827  nt = n2;
828  }
829  nt *= 4;
833  if (n3 == 2) {
834  nt >>= 1;
835  }
836  t = (double *) malloc(sizeof(double) * nt);
837  fft3d_alloc_error_check(t);
838  }
840  if ((double) n1 * n2 * n3 >= (double) FFT3D_THREADS_BEGIN_N) {
841  ddxt3da_subth(n1, n2, n3, 1, isgn, a, t, ip, w);
842  ddxt3db_subth(n1, n2, n3, 1, isgn, a, t, ip, w);
843  } else
845  {
846  ddxt3da_sub(n1, n2, n3, 1, isgn, a, t, ip, w);
847  ddxt3db_sub(n1, n2, n3, 1, isgn, a, t, ip, w);
848  }
849  if (itnull != 0) {
850  free(t);
851  }
852 }
853
854
855 /* -------- child routines -------- */
856
857
858 void xdft3da_sub(int n1, int n2, int n3, int icr, int isgn,
859  double ***a, double *t, int *ip, double *w)
860 {
861  void cdft(int n, int isgn, double *a, int *ip, double *w);
862  void rdft(int n, int isgn, double *a, int *ip, double *w);
863  int i, j, k;
864
865  for (i = 0; i < n1; i++) {
866  if (icr == 0) {
867  for (j = 0; j < n2; j++) {
868  cdft(n3, isgn, a[i][j], ip, w);
869  }
870  } else if (isgn >= 0) {
871  for (j = 0; j < n2; j++) {
872  rdft(n3, isgn, a[i][j], ip, w);
873  }
874  }
875  if (n3 > 4) {
876  for (k = 0; k < n3; k += 8) {
877  for (j = 0; j < n2; j++) {
878  t[2 * j] = a[i][j][k];
879  t[2 * j + 1] = a[i][j][k + 1];
880  t[2 * n2 + 2 * j] = a[i][j][k + 2];
881  t[2 * n2 + 2 * j + 1] = a[i][j][k + 3];
882  t[4 * n2 + 2 * j] = a[i][j][k + 4];
883  t[4 * n2 + 2 * j + 1] = a[i][j][k + 5];
884  t[6 * n2 + 2 * j] = a[i][j][k + 6];
885  t[6 * n2 + 2 * j + 1] = a[i][j][k + 7];
886  }
887  cdft(2 * n2, isgn, t, ip, w);
888  cdft(2 * n2, isgn, &t[2 * n2], ip, w);
889  cdft(2 * n2, isgn, &t[4 * n2], ip, w);
890  cdft(2 * n2, isgn, &t[6 * n2], ip, w);
891  for (j = 0; j < n2; j++) {
892  a[i][j][k] = t[2 * j];
893  a[i][j][k + 1] = t[2 * j + 1];
894  a[i][j][k + 2] = t[2 * n2 + 2 * j];
895  a[i][j][k + 3] = t[2 * n2 + 2 * j + 1];
896  a[i][j][k + 4] = t[4 * n2 + 2 * j];
897  a[i][j][k + 5] = t[4 * n2 + 2 * j + 1];
898  a[i][j][k + 6] = t[6 * n2 + 2 * j];
899  a[i][j][k + 7] = t[6 * n2 + 2 * j + 1];
900  }
901  }
902  } else if (n3 == 4) {
903  for (j = 0; j < n2; j++) {
904  t[2 * j] = a[i][j][0];
905  t[2 * j + 1] = a[i][j][1];
906  t[2 * n2 + 2 * j] = a[i][j][2];
907  t[2 * n2 + 2 * j + 1] = a[i][j][3];
908  }
909  cdft(2 * n2, isgn, t, ip, w);
910  cdft(2 * n2, isgn, &t[2 * n2], ip, w);
911  for (j = 0; j < n2; j++) {
912  a[i][j][0] = t[2 * j];
913  a[i][j][1] = t[2 * j + 1];
914  a[i][j][2] = t[2 * n2 + 2 * j];
915  a[i][j][3] = t[2 * n2 + 2 * j + 1];
916  }
917  } else if (n3 == 2) {
918  for (j = 0; j < n2; j++) {
919  t[2 * j] = a[i][j][0];
920  t[2 * j + 1] = a[i][j][1];
921  }
922  cdft(2 * n2, isgn, t, ip, w);
923  for (j = 0; j < n2; j++) {
924  a[i][j][0] = t[2 * j];
925  a[i][j][1] = t[2 * j + 1];
926  }
927  }
928  if (icr != 0 && isgn < 0) {
929  for (j = 0; j < n2; j++) {
930  rdft(n3, isgn, a[i][j], ip, w);
931  }
932  }
933  }
934 }
935
936
937 void cdft3db_sub(int n1, int n2, int n3, int isgn, double ***a,
938  double *t, int *ip, double *w)
939 {
940  void cdft(int n, int isgn, double *a, int *ip, double *w);
941  int i, j, k;
942
943  if (n3 > 4) {
944  for (j = 0; j < n2; j++) {
945  for (k = 0; k < n3; k += 8) {
946  for (i = 0; i < n1; i++) {
947  t[2 * i] = a[i][j][k];
948  t[2 * i + 1] = a[i][j][k + 1];
949  t[2 * n1 + 2 * i] = a[i][j][k + 2];
950  t[2 * n1 + 2 * i + 1] = a[i][j][k + 3];
951  t[4 * n1 + 2 * i] = a[i][j][k + 4];
952  t[4 * n1 + 2 * i + 1] = a[i][j][k + 5];
953  t[6 * n1 + 2 * i] = a[i][j][k + 6];
954  t[6 * n1 + 2 * i + 1] = a[i][j][k + 7];
955  }
956  cdft(2 * n1, isgn, t, ip, w);
957  cdft(2 * n1, isgn, &t[2 * n1], ip, w);
958  cdft(2 * n1, isgn, &t[4 * n1], ip, w);
959  cdft(2 * n1, isgn, &t[6 * n1], ip, w);
960  for (i = 0; i < n1; i++) {
961  a[i][j][k] = t[2 * i];
962  a[i][j][k + 1] = t[2 * i + 1];
963  a[i][j][k + 2] = t[2 * n1 + 2 * i];
964  a[i][j][k + 3] = t[2 * n1 + 2 * i + 1];
965  a[i][j][k + 4] = t[4 * n1 + 2 * i];
966  a[i][j][k + 5] = t[4 * n1 + 2 * i + 1];
967  a[i][j][k + 6] = t[6 * n1 + 2 * i];
968  a[i][j][k + 7] = t[6 * n1 + 2 * i + 1];
969  }
970  }
971  }
972  } else if (n3 == 4) {
973  for (j = 0; j < n2; j++) {
974  for (i = 0; i < n1; i++) {
975  t[2 * i] = a[i][j][0];
976  t[2 * i + 1] = a[i][j][1];
977  t[2 * n1 + 2 * i] = a[i][j][2];
978  t[2 * n1 + 2 * i + 1] = a[i][j][3];
979  }
980  cdft(2 * n1, isgn, t, ip, w);
981  cdft(2 * n1, isgn, &t[2 * n1], ip, w);
982  for (i = 0; i < n1; i++) {
983  a[i][j][0] = t[2 * i];
984  a[i][j][1] = t[2 * i + 1];
985  a[i][j][2] = t[2 * n1 + 2 * i];
986  a[i][j][3] = t[2 * n1 + 2 * i + 1];
987  }
988  }
989  } else if (n3 == 2) {
990  for (j = 0; j < n2; j++) {
991  for (i = 0; i < n1; i++) {
992  t[2 * i] = a[i][j][0];
993  t[2 * i + 1] = a[i][j][1];
994  }
995  cdft(2 * n1, isgn, t, ip, w);
996  for (i = 0; i < n1; i++) {
997  a[i][j][0] = t[2 * i];
998  a[i][j][1] = t[2 * i + 1];
999  }
1000  }
1001  }
1002 }
1003
1004
1005 void rdft3d_sub(int n1, int n2, int n3, int isgn, double ***a)
1006 {
1007  int n1h, n2h, i, j, k, l;
1008  double xi;
1009
1010  n1h = n1 >> 1;
1011  n2h = n2 >> 1;
1012  if (isgn < 0) {
1013  for (i = 1; i < n1h; i++) {
1014  j = n1 - i;
1015  xi = a[i][0][0] - a[j][0][0];
1016  a[i][0][0] += a[j][0][0];
1017  a[j][0][0] = xi;
1018  xi = a[j][0][1] - a[i][0][1];
1019  a[i][0][1] += a[j][0][1];
1020  a[j][0][1] = xi;
1021  xi = a[i][n2h][0] - a[j][n2h][0];
1022  a[i][n2h][0] += a[j][n2h][0];
1023  a[j][n2h][0] = xi;
1024  xi = a[j][n2h][1] - a[i][n2h][1];
1025  a[i][n2h][1] += a[j][n2h][1];
1026  a[j][n2h][1] = xi;
1027  for (k = 1; k < n2h; k++) {
1028  l = n2 - k;
1029  xi = a[i][k][0] - a[j][l][0];
1030  a[i][k][0] += a[j][l][0];
1031  a[j][l][0] = xi;
1032  xi = a[j][l][1] - a[i][k][1];
1033  a[i][k][1] += a[j][l][1];
1034  a[j][l][1] = xi;
1035  xi = a[j][k][0] - a[i][l][0];
1036  a[j][k][0] += a[i][l][0];
1037  a[i][l][0] = xi;
1038  xi = a[i][l][1] - a[j][k][1];
1039  a[j][k][1] += a[i][l][1];
1040  a[i][l][1] = xi;
1041  }
1042  }
1043  for (k = 1; k < n2h; k++) {
1044  l = n2 - k;
1045  xi = a[0][k][0] - a[0][l][0];
1046  a[0][k][0] += a[0][l][0];
1047  a[0][l][0] = xi;
1048  xi = a[0][l][1] - a[0][k][1];
1049  a[0][k][1] += a[0][l][1];
1050  a[0][l][1] = xi;
1051  xi = a[n1h][k][0] - a[n1h][l][0];
1052  a[n1h][k][0] += a[n1h][l][0];
1053  a[n1h][l][0] = xi;
1054  xi = a[n1h][l][1] - a[n1h][k][1];
1055  a[n1h][k][1] += a[n1h][l][1];
1056  a[n1h][l][1] = xi;
1057  }
1058  } else {
1059  for (i = 1; i < n1h; i++) {
1060  j = n1 - i;
1061  a[j][0][0] = 0.5 * (a[i][0][0] - a[j][0][0]);
1062  a[i][0][0] -= a[j][0][0];
1063  a[j][0][1] = 0.5 * (a[i][0][1] + a[j][0][1]);
1064  a[i][0][1] -= a[j][0][1];
1065  a[j][n2h][0] = 0.5 * (a[i][n2h][0] - a[j][n2h][0]);
1066  a[i][n2h][0] -= a[j][n2h][0];
1067  a[j][n2h][1] = 0.5 * (a[i][n2h][1] + a[j][n2h][1]);
1068  a[i][n2h][1] -= a[j][n2h][1];
1069  for (k = 1; k < n2h; k++) {
1070  l = n2 - k;
1071  a[j][l][0] = 0.5 * (a[i][k][0] - a[j][l][0]);
1072  a[i][k][0] -= a[j][l][0];
1073  a[j][l][1] = 0.5 * (a[i][k][1] + a[j][l][1]);
1074  a[i][k][1] -= a[j][l][1];
1075  a[i][l][0] = 0.5 * (a[j][k][0] - a[i][l][0]);
1076  a[j][k][0] -= a[i][l][0];
1077  a[i][l][1] = 0.5 * (a[j][k][1] + a[i][l][1]);
1078  a[j][k][1] -= a[i][l][1];
1079  }
1080  }
1081  for (k = 1; k < n2h; k++) {
1082  l = n2 - k;
1083  a[0][l][0] = 0.5 * (a[0][k][0] - a[0][l][0]);
1084  a[0][k][0] -= a[0][l][0];
1085  a[0][l][1] = 0.5 * (a[0][k][1] + a[0][l][1]);
1086  a[0][k][1] -= a[0][l][1];
1087  a[n1h][l][0] = 0.5 * (a[n1h][k][0] - a[n1h][l][0]);
1088  a[n1h][k][0] -= a[n1h][l][0];
1089  a[n1h][l][1] = 0.5 * (a[n1h][k][1] + a[n1h][l][1]);
1090  a[n1h][k][1] -= a[n1h][l][1];
1091  }
1092  }
1093 }
1094
1095
1096 void ddxt3da_sub(int n1, int n2, int n3, int ics, int isgn,
1097  double ***a, double *t, int *ip, double *w)
1098 {
1099  void ddct(int n, int isgn, double *a, int *ip, double *w);
1100  void ddst(int n, int isgn, double *a, int *ip, double *w);
1101  int i, j, k;
1102
1103  for (i = 0; i < n1; i++) {
1104  if (ics == 0) {
1105  for (j = 0; j < n2; j++) {
1106  ddct(n3, isgn, a[i][j], ip, w);
1107  }
1108  } else {
1109  for (j = 0; j < n2; j++) {
1110  ddst(n3, isgn, a[i][j], ip, w);
1111  }
1112  }
1113  if (n3 > 2) {
1114  for (k = 0; k < n3; k += 4) {
1115  for (j = 0; j < n2; j++) {
1116  t[j] = a[i][j][k];
1117  t[n2 + j] = a[i][j][k + 1];
1118  t[2 * n2 + j] = a[i][j][k + 2];
1119  t[3 * n2 + j] = a[i][j][k + 3];
1120  }
1121  if (ics == 0) {
1122  ddct(n2, isgn, t, ip, w);
1123  ddct(n2, isgn, &t[n2], ip, w);
1124  ddct(n2, isgn, &t[2 * n2], ip, w);
1125  ddct(n2, isgn, &t[3 * n2], ip, w);
1126  } else {
1127  ddst(n2, isgn, t, ip, w);
1128  ddst(n2, isgn, &t[n2], ip, w);
1129  ddst(n2, isgn, &t[2 * n2], ip, w);
1130  ddst(n2, isgn, &t[3 * n2], ip, w);
1131  }
1132  for (j = 0; j < n2; j++) {
1133  a[i][j][k] = t[j];
1134  a[i][j][k + 1] = t[n2 + j];
1135  a[i][j][k + 2] = t[2 * n2 + j];
1136  a[i][j][k + 3] = t[3 * n2 + j];
1137  }
1138  }
1139  } else if (n3 == 2) {
1140  for (j = 0; j < n2; j++) {
1141  t[j] = a[i][j][0];
1142  t[n2 + j] = a[i][j][1];
1143  }
1144  if (ics == 0) {
1145  ddct(n2, isgn, t, ip, w);
1146  ddct(n2, isgn, &t[n2], ip, w);
1147  } else {
1148  ddst(n2, isgn, t, ip, w);
1149  ddst(n2, isgn, &t[n2], ip, w);
1150  }
1151  for (j = 0; j < n2; j++) {
1152  a[i][j][0] = t[j];
1153  a[i][j][1] = t[n2 + j];
1154  }
1155  }
1156  }
1157 }
1158
1159
1160 void ddxt3db_sub(int n1, int n2, int n3, int ics, int isgn,
1161  double ***a, double *t, int *ip, double *w)
1162 {
1163  void ddct(int n, int isgn, double *a, int *ip, double *w);
1164  void ddst(int n, int isgn, double *a, int *ip, double *w);
1165  int i, j, k;
1166
1167  if (n3 > 2) {
1168  for (j = 0; j < n2; j++) {
1169  for (k = 0; k < n3; k += 4) {
1170  for (i = 0; i < n1; i++) {
1171  t[i] = a[i][j][k];
1172  t[n1 + i] = a[i][j][k + 1];
1173  t[2 * n1 + i] = a[i][j][k + 2];
1174  t[3 * n1 + i] = a[i][j][k + 3];
1175  }
1176  if (ics == 0) {
1177  ddct(n1, isgn, t, ip, w);
1178  ddct(n1, isgn, &t[n1], ip, w);
1179  ddct(n1, isgn, &t[2 * n1], ip, w);
1180  ddct(n1, isgn, &t[3 * n1], ip, w);
1181  } else {
1182  ddst(n1, isgn, t, ip, w);
1183  ddst(n1, isgn, &t[n1], ip, w);
1184  ddst(n1, isgn, &t[2 * n1], ip, w);
1185  ddst(n1, isgn, &t[3 * n1], ip, w);
1186  }
1187  for (i = 0; i < n1; i++) {
1188  a[i][j][k] = t[i];
1189  a[i][j][k + 1] = t[n1 + i];
1190  a[i][j][k + 2] = t[2 * n1 + i];
1191  a[i][j][k + 3] = t[3 * n1 + i];
1192  }
1193  }
1194  }
1195  } else if (n3 == 2) {
1196  for (j = 0; j < n2; j++) {
1197  for (i = 0; i < n1; i++) {
1198  t[i] = a[i][j][0];
1199  t[n1 + i] = a[i][j][1];
1200  }
1201  if (ics == 0) {
1202  ddct(n1, isgn, t, ip, w);
1203  ddct(n1, isgn, &t[n1], ip, w);
1204  } else {
1205  ddst(n1, isgn, t, ip, w);
1206  ddst(n1, isgn, &t[n1], ip, w);
1207  }
1208  for (i = 0; i < n1; i++) {
1209  a[i][j][0] = t[i];
1210  a[i][j][1] = t[n1 + i];
1211  }
1212  }
1213  }
1214 }
1215
1216
1218 struct fft3d_arg_st {
1220  int n0;
1221  int n1;
1222  int n2;
1223  int n3;
1224  int ic;
1225  int isgn;
1226  double ***a;
1227  double *t;
1228  int *ip;
1229  double *w;
1230 };
1231 typedef struct fft3d_arg_st fft3d_arg_t;
1232
1233
1234 void xdft3da_subth(int n1, int n2, int n3, int icr, int isgn,
1235  double ***a, double *t, int *ip, double *w)
1236 {
1237  void *xdft3da_th(void *p);
1241
1243  if (nthread > n1) {
1245  }
1246  nt = 8 * n2;
1247  if (n3 == 4) {
1248  nt >>= 1;
1249  } else if (n3 < 4) {
1250  nt >>= 2;
1251  }
1252  for (i = 0; i < nthread; i++) {
1254  ag[i].n0 = i;
1255  ag[i].n1 = n1;
1256  ag[i].n2 = n2;
1257  ag[i].n3 = n3;
1258  ag[i].ic = icr;
1259  ag[i].isgn = isgn;
1260  ag[i].a = a;
1261  ag[i].t = &t[nt * i];
1262  ag[i].ip = ip;
1263  ag[i].w = w;
1265  }
1266  for (i = 0; i < nthread; i++) {
1268  }
1269 }
1270
1271
1272 void cdft3db_subth(int n1, int n2, int n3, int isgn, double ***a,
1273  double *t, int *ip, double *w)
1274 {
1275  void *cdft3db_th(void *p);
1279
1281  if (nthread > n2) {
1283  }
1284  nt = 8 * n1;
1285  if (n3 == 4) {
1286  nt >>= 1;
1287  } else if (n3 < 4) {
1288  nt >>= 2;
1289  }
1290  for (i = 0; i < nthread; i++) {
1292  ag[i].n0 = i;
1293  ag[i].n1 = n1;
1294  ag[i].n2 = n2;
1295  ag[i].n3 = n3;
1296  ag[i].isgn = isgn;
1297  ag[i].a = a;
1298  ag[i].t = &t[nt * i];
1299  ag[i].ip = ip;
1300  ag[i].w = w;
1302  }
1303  for (i = 0; i < nthread; i++) {
1305  }
1306 }
1307
1308
1309 void ddxt3da_subth(int n1, int n2, int n3, int ics, int isgn,
1310  double ***a, double *t, int *ip, double *w)
1311 {
1312  void *ddxt3da_th(void *p);
1316
1318  if (nthread > n1) {
1320  }
1321  nt = 4 * n2;
1322  if (n3 == 2) {
1323  nt >>= 1;
1324  }
1325  for (i = 0; i < nthread; i++) {
1327  ag[i].n0 = i;
1328  ag[i].n1 = n1;
1329  ag[i].n2 = n2;
1330  ag[i].n3 = n3;
1331  ag[i].ic = ics;
1332  ag[i].isgn = isgn;
1333  ag[i].a = a;
1334  ag[i].t = &t[nt * i];
1335  ag[i].ip = ip;
1336  ag[i].w = w;
1338  }
1339  for (i = 0; i < nthread; i++) {
1341  }
1342 }
1343
1344
1345 void ddxt3db_subth(int n1, int n2, int n3, int ics, int isgn,
1346  double ***a, double *t, int *ip, double *w)
1347 {
1348  void *ddxt3db_th(void *p);
1352
1354  if (nthread > n2) {
1356  }
1357  nt = 4 * n1;
1358  if (n3 == 2) {
1359  nt >>= 1;
1360  }
1361  for (i = 0; i < nthread; i++) {
1363  ag[i].n0 = i;
1364  ag[i].n1 = n1;
1365  ag[i].n2 = n2;
1366  ag[i].n3 = n3;
1367  ag[i].ic = ics;
1368  ag[i].isgn = isgn;
1369  ag[i].a = a;
1370  ag[i].t = &t[nt * i];
1371  ag[i].ip = ip;
1372  ag[i].w = w;
1374  }
1375  for (i = 0; i < nthread; i++) {
1377  }
1378 }
1379
1380
1381 void *xdft3da_th(void *p)
1382 {
1383  void cdft(int n, int isgn, double *a, int *ip, double *w);
1384  void rdft(int n, int isgn, double *a, int *ip, double *w);
1385  int nthread, n0, n1, n2, n3, icr, isgn, *ip, i, j, k;
1386  double ***a, *t, *w;
1387
1389  n0 = ((fft3d_arg_t *) p)->n0;
1390  n1 = ((fft3d_arg_t *) p)->n1;
1391  n2 = ((fft3d_arg_t *) p)->n2;
1392  n3 = ((fft3d_arg_t *) p)->n3;
1393  icr = ((fft3d_arg_t *) p)->ic;
1394  isgn = ((fft3d_arg_t *) p)->isgn;
1395  a = ((fft3d_arg_t *) p)->a;
1396  t = ((fft3d_arg_t *) p)->t;
1397  ip = ((fft3d_arg_t *) p)->ip;
1398  w = ((fft3d_arg_t *) p)->w;
1399  for (i = n0; i < n1; i += nthread) {
1400  if (icr == 0) {
1401  for (j = 0; j < n2; j++) {
1402  cdft(n3, isgn, a[i][j], ip, w);
1403  }
1404  } else if (isgn >= 0) {
1405  for (j = 0; j < n2; j++) {
1406  rdft(n3, isgn, a[i][j], ip, w);
1407  }
1408  }
1409  if (n3 > 4) {
1410  for (k = 0; k < n3; k += 8) {
1411  for (j = 0; j < n2; j++) {
1412  t[2 * j] = a[i][j][k];
1413  t[2 * j + 1] = a[i][j][k + 1];
1414  t[2 * n2 + 2 * j] = a[i][j][k + 2];
1415  t[2 * n2 + 2 * j + 1] = a[i][j][k + 3];
1416  t[4 * n2 + 2 * j] = a[i][j][k + 4];
1417  t[4 * n2 + 2 * j + 1] = a[i][j][k + 5];
1418  t[6 * n2 + 2 * j] = a[i][j][k + 6];
1419  t[6 * n2 + 2 * j + 1] = a[i][j][k + 7];
1420  }
1421  cdft(2 * n2, isgn, t, ip, w);
1422  cdft(2 * n2, isgn, &t[2 * n2], ip, w);
1423  cdft(2 * n2, isgn, &t[4 * n2], ip, w);
1424  cdft(2 * n2, isgn, &t[6 * n2], ip, w);
1425  for (j = 0; j < n2; j++) {
1426  a[i][j][k] = t[2 * j];
1427  a[i][j][k + 1] = t[2 * j + 1];
1428  a[i][j][k + 2] = t[2 * n2 + 2 * j];
1429  a[i][j][k + 3] = t[2 * n2 + 2 * j + 1];
1430  a[i][j][k + 4] = t[4 * n2 + 2 * j];
1431  a[i][j][k + 5] = t[4 * n2 + 2 * j + 1];
1432  a[i][j][k + 6] = t[6 * n2 + 2 * j];
1433  a[i][j][k + 7] = t[6 * n2 + 2 * j + 1];
1434  }
1435  }
1436  } else if (n3 == 4) {
1437  for (j = 0; j < n2; j++) {
1438  t[2 * j] = a[i][j][0];
1439  t[2 * j + 1] = a[i][j][1];
1440  t[2 * n2 + 2 * j] = a[i][j][2];
1441  t[2 * n2 + 2 * j + 1] = a[i][j][3];
1442  }
1443  cdft(2 * n2, isgn, t, ip, w);
1444  cdft(2 * n2, isgn, &t[2 * n2], ip, w);
1445  for (j = 0; j < n2; j++) {
1446  a[i][j][0] = t[2 * j];
1447  a[i][j][1] = t[2 * j + 1];
1448  a[i][j][2] = t[2 * n2 + 2 * j];
1449  a[i][j][3] = t[2 * n2 + 2 * j + 1];
1450  }
1451  } else if (n3 == 2) {
1452  for (j = 0; j < n2; j++) {
1453  t[2 * j] = a[i][j][0];
1454  t[2 * j + 1] = a[i][j][1];
1455  }
1456  cdft(2 * n2, isgn, t, ip, w);
1457  for (j = 0; j < n2; j++) {
1458  a[i][j][0] = t[2 * j];
1459  a[i][j][1] = t[2 * j + 1];
1460  }
1461  }
1462  if (icr != 0 && isgn < 0) {
1463  for (j = 0; j < n2; j++) {
1464  rdft(n3, isgn, a[i][j], ip, w);
1465  }
1466  }
1467  }
1468  return (void *) 0;
1469 }
1470
1471
1472 void *cdft3db_th(void *p)
1473 {
1474  void cdft(int n, int isgn, double *a, int *ip, double *w);
1475  int nthread, n0, n1, n2, n3, isgn, *ip, i, j, k;
1476  double ***a, *t, *w;
1477
1479  n0 = ((fft3d_arg_t *) p)->n0;
1480  n1 = ((fft3d_arg_t *) p)->n1;
1481  n2 = ((fft3d_arg_t *) p)->n2;
1482  n3 = ((fft3d_arg_t *) p)->n3;
1483  isgn = ((fft3d_arg_t *) p)->isgn;
1484  a = ((fft3d_arg_t *) p)->a;
1485  t = ((fft3d_arg_t *) p)->t;
1486  ip = ((fft3d_arg_t *) p)->ip;
1487  w = ((fft3d_arg_t *) p)->w;
1488  if (n3 > 4) {
1489  for (j = n0; j < n2; j += nthread) {
1490  for (k = 0; k < n3; k += 8) {
1491  for (i = 0; i < n1; i++) {
1492  t[2 * i] = a[i][j][k];
1493  t[2 * i + 1] = a[i][j][k + 1];
1494  t[2 * n1 + 2 * i] = a[i][j][k + 2];
1495  t[2 * n1 + 2 * i + 1] = a[i][j][k + 3];
1496  t[4 * n1 + 2 * i] = a[i][j][k + 4];
1497  t[4 * n1 + 2 * i + 1] = a[i][j][k + 5];
1498  t[6 * n1 + 2 * i] = a[i][j][k + 6];
1499  t[6 * n1 + 2 * i + 1] = a[i][j][k + 7];
1500  }
1501  cdft(2 * n1, isgn, t, ip, w);
1502  cdft(2 * n1, isgn, &t[2 * n1], ip, w);
1503  cdft(2 * n1, isgn, &t[4 * n1], ip, w);
1504  cdft(2 * n1, isgn, &t[6 * n1], ip, w);
1505  for (i = 0; i < n1; i++) {
1506  a[i][j][k] = t[2 * i];
1507  a[i][j][k + 1] = t[2 * i + 1];
1508  a[i][j][k + 2] = t[2 * n1 + 2 * i];
1509  a[i][j][k + 3] = t[2 * n1 + 2 * i + 1];
1510  a[i][j][k + 4] = t[4 * n1 + 2 * i];
1511  a[i][j][k + 5] = t[4 * n1 + 2 * i + 1];
1512  a[i][j][k + 6] = t[6 * n1 + 2 * i];
1513  a[i][j][k + 7] = t[6 * n1 + 2 * i + 1];
1514  }
1515  }
1516  }
1517  } else if (n3 == 4) {
1518  for (j = n0; j < n2; j += nthread) {
1519  for (i = 0; i < n1; i++) {
1520  t[2 * i] = a[i][j][0];
1521  t[2 * i + 1] = a[i][j][1];
1522  t[2 * n1 + 2 * i] = a[i][j][2];
1523  t[2 * n1 + 2 * i + 1] = a[i][j][3];
1524  }
1525  cdft(2 * n1, isgn, t, ip, w);
1526  cdft(2 * n1, isgn, &t[2 * n1], ip, w);
1527  for (i = 0; i < n1; i++) {
1528  a[i][j][0] = t[2 * i];
1529  a[i][j][1] = t[2 * i + 1];
1530  a[i][j][2] = t[2 * n1 + 2 * i];
1531  a[i][j][3] = t[2 * n1 + 2 * i + 1];
1532  }
1533  }
1534  } else if (n3 == 2) {
1535  for (j = n0; j < n2; j += nthread) {
1536  for (i = 0; i < n1; i++) {
1537  t[2 * i] = a[i][j][0];
1538  t[2 * i + 1] = a[i][j][1];
1539  }
1540  cdft(2 * n1, isgn, t, ip, w);
1541  for (i = 0; i < n1; i++) {
1542  a[i][j][0] = t[2 * i];
1543  a[i][j][1] = t[2 * i + 1];
1544  }
1545  }
1546  }
1547  return (void *) 0;
1548 }
1549
1550
1551 void *ddxt3da_th(void *p)
1552 {
1553  void ddct(int n, int isgn, double *a, int *ip, double *w);
1554  void ddst(int n, int isgn, double *a, int *ip, double *w);
1555  int nthread, n0, n1, n2, n3, ics, isgn, *ip, i, j, k;
1556  double ***a, *t, *w;
1557
1559  n0 = ((fft3d_arg_t *) p)->n0;
1560  n1 = ((fft3d_arg_t *) p)->n1;
1561  n2 = ((fft3d_arg_t *) p)->n2;
1562  n3 = ((fft3d_arg_t *) p)->n3;
1563  ics = ((fft3d_arg_t *) p)->ic;
1564  isgn = ((fft3d_arg_t *) p)->isgn;
1565  a = ((fft3d_arg_t *) p)->a;
1566  t = ((fft3d_arg_t *) p)->t;
1567  ip = ((fft3d_arg_t *) p)->ip;
1568  w = ((fft3d_arg_t *) p)->w;
1569  for (i = n0; i < n1; i += nthread) {
1570  if (ics == 0) {
1571  for (j = 0; j < n2; j++) {
1572  ddct(n3, isgn, a[i][j], ip, w);
1573  }
1574  } else {
1575  for (j = 0; j < n2; j++) {
1576  ddst(n3, isgn, a[i][j], ip, w);
1577  }
1578  }
1579  if (n3 > 2) {
1580  for (k = 0; k < n3; k += 4) {
1581  for (j = 0; j < n2; j++) {
1582  t[j] = a[i][j][k];
1583  t[n2 + j] = a[i][j][k + 1];
1584  t[2 * n2 + j] = a[i][j][k + 2];
1585  t[3 * n2 + j] = a[i][j][k + 3];
1586  }
1587  if (ics == 0) {
1588  ddct(n2, isgn, t, ip, w);
1589  ddct(n2, isgn, &t[n2], ip, w);
1590  ddct(n2, isgn, &t[2 * n2], ip, w);
1591  ddct(n2, isgn, &t[3 * n2], ip, w);
1592  } else {
1593  ddst(n2, isgn, t, ip, w);
1594  ddst(n2, isgn, &t[n2], ip, w);
1595  ddst(n2, isgn, &t[2 * n2], ip, w);
1596  ddst(n2, isgn, &t[3 * n2], ip, w);
1597  }
1598  for (j = 0; j < n2; j++) {
1599  a[i][j][k] = t[j];
1600  a[i][j][k + 1] = t[n2 + j];
1601  a[i][j][k + 2] = t[2 * n2 + j];
1602  a[i][j][k + 3] = t[3 * n2 + j];
1603  }
1604  }
1605  } else if (n3 == 2) {
1606  for (j = 0; j < n2; j++) {
1607  t[j] = a[i][j][0];
1608  t[n2 + j] = a[i][j][1];
1609  }
1610  if (ics == 0) {
1611  ddct(n2, isgn, t, ip, w);
1612  ddct(n2, isgn, &t[n2], ip, w);
1613  } else {
1614  ddst(n2, isgn, t, ip, w);
1615  ddst(n2, isgn, &t[n2], ip, w);
1616  }
1617  for (j = 0; j < n2; j++) {
1618  a[i][j][0] = t[j];
1619  a[i][j][1] = t[n2 + j];
1620  }
1621  }
1622  }
1623  return (void *) 0;
1624 }
1625
1626
1627 void *ddxt3db_th(void *p)
1628 {
1629  void ddct(int n, int isgn, double *a, int *ip, double *w);
1630  void ddst(int n, int isgn, double *a, int *ip, double *w);
1631  int nthread, n0, n1, n2, n3, ics, isgn, *ip, i, j, k;
1632  double ***a, *t, *w;
1633
1635  n0 = ((fft3d_arg_t *) p)->n0;
1636  n1 = ((fft3d_arg_t *) p)->n1;
1637  n2 = ((fft3d_arg_t *) p)->n2;
1638  n3 = ((fft3d_arg_t *) p)->n3;
1639  ics = ((fft3d_arg_t *) p)->ic;
1640  isgn = ((fft3d_arg_t *) p)->isgn;
1641  a = ((fft3d_arg_t *) p)->a;
1642  t = ((fft3d_arg_t *) p)->t;
1643  ip = ((fft3d_arg_t *) p)->ip;
1644  w = ((fft3d_arg_t *) p)->w;
1645  if (n3 > 2) {
1646  for (j = n0; j < n2; j += nthread) {
1647  for (k = 0; k < n3; k += 4) {
1648  for (i = 0; i < n1; i++) {
1649  t[i] = a[i][j][k];
1650  t[n1 + i] = a[i][j][k + 1];
1651  t[2 * n1 + i] = a[i][j][k + 2];
1652  t[3 * n1 + i] = a[i][j][k + 3];
1653  }
1654  if (ics == 0) {
1655  ddct(n1, isgn, t, ip, w);
1656  ddct(n1, isgn, &t[n1], ip, w);
1657  ddct(n1, isgn, &t[2 * n1], ip, w);
1658  ddct(n1, isgn, &t[3 * n1], ip, w);
1659  } else {
1660  ddst(n1, isgn, t, ip, w);
1661  ddst(n1, isgn, &t[n1], ip, w);
1662  ddst(n1, isgn, &t[2 * n1], ip, w);
1663  ddst(n1, isgn, &t[3 * n1], ip, w);
1664  }
1665  for (i = 0; i < n1; i++) {
1666  a[i][j][k] = t[i];
1667  a[i][j][k + 1] = t[n1 + i];
1668  a[i][j][k + 2] = t[2 * n1 + i];
1669  a[i][j][k + 3] = t[3 * n1 + i];
1670  }
1671  }
1672  }
1673  } else if (n3 == 2) {
1674  for (j = n0; j < n2; j += nthread) {
1675  for (i = 0; i < n1; i++) {
1676  t[i] = a[i][j][0];
1677  t[n1 + i] = a[i][j][1];
1678  }
1679  if (ics == 0) {
1680  ddct(n1, isgn, t, ip, w);
1681  ddct(n1, isgn, &t[n1], ip, w);
1682  } else {
1683  ddst(n1, isgn, t, ip, w);
1684  ddst(n1, isgn, &t[n1], ip, w);
1685  }
1686  for (i = 0; i < n1; i++) {
1687  a[i][j][0] = t[i];
1688  a[i][j][1] = t[n1 + i];
1689  }
1690  }
1691  }
1692  return (void *) 0;
1693 }