435 #define fft3d_alloc_error_check(p) { \
437 fprintf(stderr, "fft3d memory allocation error\n"); \
443 #ifdef USE_FFT3D_PTHREADS
444 #define USE_FFT3D_THREADS
445 #ifndef FFT3D_MAX_THREADS
446 #define FFT3D_MAX_THREADS 4
448 #ifndef FFT3D_THREADS_BEGIN_N
449 #define FFT3D_THREADS_BEGIN_N 65536
452 #define fft3d_thread_t pthread_t
453 #define fft3d_thread_create(thp,func,argp) { \
454 if (pthread_create(thp, NULL, func, (void *) (argp)) != 0) { \
455 fprintf(stderr, "fft3d thread error\n"); \
459 #define fft3d_thread_wait(th) { \
460 if (pthread_join(th, NULL) != 0) { \
461 fprintf(stderr, "fft3d thread error\n"); \
468 #ifdef USE_FFT3D_WINTHREADS
469 #define USE_FFT3D_THREADS
470 #ifndef FFT3D_MAX_THREADS
471 #define FFT3D_MAX_THREADS 4
473 #ifndef FFT3D_THREADS_BEGIN_N
474 #define FFT3D_THREADS_BEGIN_N 131072
477 #define fft3d_thread_t HANDLE
478 #define fft3d_thread_create(thp,func,argp) { \
480 *(thp) = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE) (func), (LPVOID) (argp), 0, &thid); \
482 fprintf(stderr, "fft3d thread error\n"); \
486 #define fft3d_thread_wait(th) { \
487 WaitForSingleObject(th, INFINITE); \
493 void cdft3d(
int n1,
int n2,
int n3,
int isgn,
double ***a,
494 double *t,
int *ip,
double *w)
496 void makewt(
int nw,
int *ip,
double *w);
497 void xdft3da_sub(
int n1,
int n2,
int n3,
int icr,
int isgn,
498 double ***a,
double *t,
int *ip,
double *w);
499 void cdft3db_sub(
int n1,
int n2,
int n3,
int isgn,
double ***a,
500 double *t,
int *ip,
double *w);
501 #ifdef USE_FFT3D_THREADS
502 void xdft3da_subth(
int n1,
int n2,
int n3,
int icr,
int isgn,
503 double ***a,
double *t,
int *ip,
double *w);
504 void cdft3db_subth(
int n1,
int n2,
int n3,
int isgn,
double ***a,
505 double *t,
int *ip,
double *w);
517 if (n > (ip[0] << 2)) {
518 makewt(n >> 2, ip, w);
528 #ifdef USE_FFT3D_THREADS
529 nt *= FFT3D_MAX_THREADS;
536 t = (
double *) malloc(
sizeof(
double) * nt);
537 fft3d_alloc_error_check(t);
539 #ifdef USE_FFT3D_THREADS
540 if ((
double) n1 * n2 * n3 >= (
double) FFT3D_THREADS_BEGIN_N) {
541 xdft3da_subth(n1, n2, n3, 0, isgn, a, t, ip, w);
542 cdft3db_subth(n1, n2, n3, isgn, a, t, ip, w);
546 xdft3da_sub(n1, n2, n3, 0, isgn, a, t, ip, w);
547 cdft3db_sub(n1, n2, n3, isgn, a, t, ip, w);
555 void rdft3d(
int n1,
int n2,
int n3,
int isgn,
double ***a,
556 double *t,
int *ip,
double *w)
558 void makewt(
int nw,
int *ip,
double *w);
559 void makect(
int nc,
int *ip,
double *c);
560 void xdft3da_sub(
int n1,
int n2,
int n3,
int icr,
int isgn,
561 double ***a,
double *t,
int *ip,
double *w);
562 void cdft3db_sub(
int n1,
int n2,
int n3,
int isgn,
double ***a,
563 double *t,
int *ip,
double *w);
564 void rdft3d_sub(
int n1,
int n2,
int n3,
int isgn,
double ***a);
565 #ifdef USE_FFT3D_THREADS
566 void xdft3da_subth(
int n1,
int n2,
int n3,
int icr,
int isgn,
567 double ***a,
double *t,
int *ip,
double *w);
568 void cdft3db_subth(
int n1,
int n2,
int n3,
int isgn,
double ***a,
569 double *t,
int *ip,
double *w);
571 int n, nw, nc, itnull, nt;
587 if (n3 > (nc << 2)) {
589 makect(nc, ip, w + nw);
599 #ifdef USE_FFT3D_THREADS
600 nt *= FFT3D_MAX_THREADS;
607 t = (
double *) malloc(
sizeof(
double) * nt);
608 fft3d_alloc_error_check(t);
610 #ifdef USE_FFT3D_THREADS
611 if ((
double) n1 * n2 * n3 >= (
double) FFT3D_THREADS_BEGIN_N) {
613 rdft3d_sub(n1, n2, n3, isgn, a);
614 cdft3db_subth(n1, n2, n3, isgn, a, t, ip, w);
616 xdft3da_subth(n1, n2, n3, 1, isgn, a, t, ip, w);
618 cdft3db_subth(n1, n2, n3, isgn, a, t, ip, w);
619 rdft3d_sub(n1, n2, n3, isgn, a);
625 rdft3d_sub(n1, n2, n3, isgn, a);
626 cdft3db_sub(n1, n2, n3, isgn, a, t, ip, w);
628 xdft3da_sub(n1, n2, n3, 1, isgn, a, t, ip, w);
630 cdft3db_sub(n1, n2, n3, isgn, a, t, ip, w);
631 rdft3d_sub(n1, n2, n3, isgn, a);
640 void rdft3dsort(
int n1,
int n2,
int n3,
int isgn,
double ***a)
648 for (i = 0; i < n1; i++) {
649 for (j = n2h + 1; j < n2; j++) {
650 a[i][j][0] = a[i][j][n3 + 1];
651 a[i][j][1] = a[i][j][n3];
654 for (i = n1h + 1; i < n1; i++) {
655 a[i][0][0] = a[i][0][n3 + 1];
656 a[i][0][1] = a[i][0][n3];
657 a[i][n2h][0] = a[i][n2h][n3 + 1];
658 a[i][n2h][1] = a[i][n2h][n3];
660 a[0][0][1] = a[0][0][n3];
661 a[0][n2h][1] = a[0][n2h][n3];
662 a[n1h][0][1] = a[n1h][0][n3];
663 a[n1h][n2h][1] = a[n1h][n2h][n3];
665 for (j = n2h + 1; j < n2; j++) {
670 a[0][n2 - j][n3] = x;
671 a[0][n2 - j][n3 + 1] = -y;
672 a[0][j][0] = a[0][n2 - j][0];
673 a[0][j][1] = -a[0][n2 - j][1];
675 for (i = 1; i < n1; i++) {
676 for (j = n2h + 1; j < n2; j++) {
681 a[n1 - i][n2 - j][n3] = x;
682 a[n1 - i][n2 - j][n3 + 1] = -y;
683 a[i][j][0] = a[n1 - i][n2 - j][0];
684 a[i][j][1] = -a[n1 - i][n2 - j][1];
687 for (i = n1h + 1; i < n1; i++) {
692 a[n1 - i][0][n3] = x;
693 a[n1 - i][0][n3 + 1] = -y;
694 a[i][0][0] = a[n1 - i][0][0];
695 a[i][0][1] = -a[n1 - i][0][1];
699 a[i][n2h][n3 + 1] = y;
700 a[n1 - i][n2h][n3] = x;
701 a[n1 - i][n2h][n3 + 1] = -y;
702 a[i][n2h][0] = a[n1 - i][n2h][0];
703 a[i][n2h][1] = -a[n1 - i][n2h][1];
705 a[0][0][n3] = a[0][0][1];
708 a[0][n2h][n3] = a[0][n2h][1];
709 a[0][n2h][n3 + 1] = 0;
711 a[n1h][0][n3] = a[n1h][0][1];
712 a[n1h][0][n3 + 1] = 0;
714 a[n1h][n2h][n3] = a[n1h][n2h][1];
715 a[n1h][n2h][n3 + 1] = 0;
721 void ddct3d(
int n1,
int n2,
int n3,
int isgn,
double ***a,
722 double *t,
int *ip,
double *w)
724 void makewt(
int nw,
int *ip,
double *w);
725 void makect(
int nc,
int *ip,
double *c);
726 void ddxt3da_sub(
int n1,
int n2,
int n3,
int ics,
int isgn,
727 double ***a,
double *t,
int *ip,
double *w);
728 void ddxt3db_sub(
int n1,
int n2,
int n3,
int ics,
int isgn,
729 double ***a,
double *t,
int *ip,
double *w);
730 #ifdef USE_FFT3D_THREADS
731 void ddxt3da_subth(
int n1,
int n2,
int n3,
int ics,
int isgn,
732 double ***a,
double *t,
int *ip,
double *w);
733 void ddxt3db_subth(
int n1,
int n2,
int n3,
int ics,
int isgn,
734 double ***a,
double *t,
int *ip,
double *w);
736 int n, nw, nc, itnull, nt;
753 makect(nc, ip, w + nw);
763 #ifdef USE_FFT3D_THREADS
764 nt *= FFT3D_MAX_THREADS;
769 t = (
double *) malloc(
sizeof(
double) * nt);
770 fft3d_alloc_error_check(t);
772 #ifdef USE_FFT3D_THREADS
773 if ((
double) n1 * n2 * n3 >= (
double) FFT3D_THREADS_BEGIN_N) {
774 ddxt3da_subth(n1, n2, n3, 0, isgn, a, t, ip, w);
775 ddxt3db_subth(n1, n2, n3, 0, isgn, a, t, ip, w);
779 ddxt3da_sub(n1, n2, n3, 0, isgn, a, t, ip, w);
780 ddxt3db_sub(n1, n2, n3, 0, isgn, a, t, ip, w);
788 void ddst3d(
int n1,
int n2,
int n3,
int isgn,
double ***a,
789 double *t,
int *ip,
double *w)
791 void makewt(
int nw,
int *ip,
double *w);
792 void makect(
int nc,
int *ip,
double *c);
793 void ddxt3da_sub(
int n1,
int n2,
int n3,
int ics,
int isgn,
794 double ***a,
double *t,
int *ip,
double *w);
795 void ddxt3db_sub(
int n1,
int n2,
int n3,
int ics,
int isgn,
796 double ***a,
double *t,
int *ip,
double *w);
797 #ifdef USE_FFT3D_THREADS
798 void ddxt3da_subth(
int n1,
int n2,
int n3,
int ics,
int isgn,
799 double ***a,
double *t,
int *ip,
double *w);
800 void ddxt3db_subth(
int n1,
int n2,
int n3,
int ics,
int isgn,
801 double ***a,
double *t,
int *ip,
double *w);
803 int n, nw, nc, itnull, nt;
820 makect(nc, ip, w + nw);
830 #ifdef USE_FFT3D_THREADS
831 nt *= FFT3D_MAX_THREADS;
836 t = (
double *) malloc(
sizeof(
double) * nt);
837 fft3d_alloc_error_check(t);
839 #ifdef USE_FFT3D_THREADS
840 if ((
double) n1 * n2 * n3 >= (
double) FFT3D_THREADS_BEGIN_N) {
841 ddxt3da_subth(n1, n2, n3, 1, isgn, a, t, ip, w);
842 ddxt3db_subth(n1, n2, n3, 1, isgn, a, t, ip, w);
846 ddxt3da_sub(n1, n2, n3, 1, isgn, a, t, ip, w);
847 ddxt3db_sub(n1, n2, n3, 1, isgn, a, t, ip, w);
858 void xdft3da_sub(
int n1,
int n2,
int n3,
int icr,
int isgn,
859 double ***a,
double *t,
int *ip,
double *w)
861 void cdft(
int n,
int isgn,
double *a,
int *ip,
double *w);
862 void rdft(
int n,
int isgn,
double *a,
int *ip,
double *w);
865 for (i = 0; i < n1; i++) {
867 for (j = 0; j < n2; j++) {
868 cdft(n3, isgn, a[i][j], ip, w);
870 }
else if (isgn >= 0) {
871 for (j = 0; j < n2; j++) {
872 rdft(n3, isgn, a[i][j], ip, w);
876 for (k = 0; k < n3; k += 8) {
877 for (j = 0; j < n2; j++) {
878 t[2 * j] = a[i][j][k];
879 t[2 * j + 1] = a[i][j][k + 1];
880 t[2 * n2 + 2 * j] = a[i][j][k + 2];
881 t[2 * n2 + 2 * j + 1] = a[i][j][k + 3];
882 t[4 * n2 + 2 * j] = a[i][j][k + 4];
883 t[4 * n2 + 2 * j + 1] = a[i][j][k + 5];
884 t[6 * n2 + 2 * j] = a[i][j][k + 6];
885 t[6 * n2 + 2 * j + 1] = a[i][j][k + 7];
887 cdft(2 * n2, isgn, t, ip, w);
888 cdft(2 * n2, isgn, &t[2 * n2], ip, w);
889 cdft(2 * n2, isgn, &t[4 * n2], ip, w);
890 cdft(2 * n2, isgn, &t[6 * n2], ip, w);
891 for (j = 0; j < n2; j++) {
892 a[i][j][k] = t[2 * j];
893 a[i][j][k + 1] = t[2 * j + 1];
894 a[i][j][k + 2] = t[2 * n2 + 2 * j];
895 a[i][j][k + 3] = t[2 * n2 + 2 * j + 1];
896 a[i][j][k + 4] = t[4 * n2 + 2 * j];
897 a[i][j][k + 5] = t[4 * n2 + 2 * j + 1];
898 a[i][j][k + 6] = t[6 * n2 + 2 * j];
899 a[i][j][k + 7] = t[6 * n2 + 2 * j + 1];
902 }
else if (n3 == 4) {
903 for (j = 0; j < n2; j++) {
904 t[2 * j] = a[i][j][0];
905 t[2 * j + 1] = a[i][j][1];
906 t[2 * n2 + 2 * j] = a[i][j][2];
907 t[2 * n2 + 2 * j + 1] = a[i][j][3];
909 cdft(2 * n2, isgn, t, ip, w);
910 cdft(2 * n2, isgn, &t[2 * n2], ip, w);
911 for (j = 0; j < n2; j++) {
912 a[i][j][0] = t[2 * j];
913 a[i][j][1] = t[2 * j + 1];
914 a[i][j][2] = t[2 * n2 + 2 * j];
915 a[i][j][3] = t[2 * n2 + 2 * j + 1];
917 }
else if (n3 == 2) {
918 for (j = 0; j < n2; j++) {
919 t[2 * j] = a[i][j][0];
920 t[2 * j + 1] = a[i][j][1];
922 cdft(2 * n2, isgn, t, ip, w);
923 for (j = 0; j < n2; j++) {
924 a[i][j][0] = t[2 * j];
925 a[i][j][1] = t[2 * j + 1];
928 if (icr != 0 && isgn < 0) {
929 for (j = 0; j < n2; j++) {
930 rdft(n3, isgn, a[i][j], ip, w);
937 void cdft3db_sub(
int n1,
int n2,
int n3,
int isgn,
double ***a,
938 double *t,
int *ip,
double *w)
940 void cdft(
int n,
int isgn,
double *a,
int *ip,
double *w);
944 for (j = 0; j < n2; j++) {
945 for (k = 0; k < n3; k += 8) {
946 for (i = 0; i < n1; i++) {
947 t[2 * i] = a[i][j][k];
948 t[2 * i + 1] = a[i][j][k + 1];
949 t[2 * n1 + 2 * i] = a[i][j][k + 2];
950 t[2 * n1 + 2 * i + 1] = a[i][j][k + 3];
951 t[4 * n1 + 2 * i] = a[i][j][k + 4];
952 t[4 * n1 + 2 * i + 1] = a[i][j][k + 5];
953 t[6 * n1 + 2 * i] = a[i][j][k + 6];
954 t[6 * n1 + 2 * i + 1] = a[i][j][k + 7];
956 cdft(2 * n1, isgn, t, ip, w);
957 cdft(2 * n1, isgn, &t[2 * n1], ip, w);
958 cdft(2 * n1, isgn, &t[4 * n1], ip, w);
959 cdft(2 * n1, isgn, &t[6 * n1], ip, w);
960 for (i = 0; i < n1; i++) {
961 a[i][j][k] = t[2 * i];
962 a[i][j][k + 1] = t[2 * i + 1];
963 a[i][j][k + 2] = t[2 * n1 + 2 * i];
964 a[i][j][k + 3] = t[2 * n1 + 2 * i + 1];
965 a[i][j][k + 4] = t[4 * n1 + 2 * i];
966 a[i][j][k + 5] = t[4 * n1 + 2 * i + 1];
967 a[i][j][k + 6] = t[6 * n1 + 2 * i];
968 a[i][j][k + 7] = t[6 * n1 + 2 * i + 1];
972 }
else if (n3 == 4) {
973 for (j = 0; j < n2; j++) {
974 for (i = 0; i < n1; i++) {
975 t[2 * i] = a[i][j][0];
976 t[2 * i + 1] = a[i][j][1];
977 t[2 * n1 + 2 * i] = a[i][j][2];
978 t[2 * n1 + 2 * i + 1] = a[i][j][3];
980 cdft(2 * n1, isgn, t, ip, w);
981 cdft(2 * n1, isgn, &t[2 * n1], ip, w);
982 for (i = 0; i < n1; i++) {
983 a[i][j][0] = t[2 * i];
984 a[i][j][1] = t[2 * i + 1];
985 a[i][j][2] = t[2 * n1 + 2 * i];
986 a[i][j][3] = t[2 * n1 + 2 * i + 1];
989 }
else if (n3 == 2) {
990 for (j = 0; j < n2; j++) {
991 for (i = 0; i < n1; i++) {
992 t[2 * i] = a[i][j][0];
993 t[2 * i + 1] = a[i][j][1];
995 cdft(2 * n1, isgn, t, ip, w);
996 for (i = 0; i < n1; i++) {
997 a[i][j][0] = t[2 * i];
998 a[i][j][1] = t[2 * i + 1];
1005 void rdft3d_sub(
int n1,
int n2,
int n3,
int isgn,
double ***a)
1007 int n1h, n2h, i, j, k, l;
1013 for (i = 1; i < n1h; i++) {
1015 xi = a[i][0][0] - a[j][0][0];
1016 a[i][0][0] += a[j][0][0];
1018 xi = a[j][0][1] - a[i][0][1];
1019 a[i][0][1] += a[j][0][1];
1021 xi = a[i][n2h][0] - a[j][n2h][0];
1022 a[i][n2h][0] += a[j][n2h][0];
1024 xi = a[j][n2h][1] - a[i][n2h][1];
1025 a[i][n2h][1] += a[j][n2h][1];
1027 for (k = 1; k < n2h; k++) {
1029 xi = a[i][k][0] - a[j][l][0];
1030 a[i][k][0] += a[j][l][0];
1032 xi = a[j][l][1] - a[i][k][1];
1033 a[i][k][1] += a[j][l][1];
1035 xi = a[j][k][0] - a[i][l][0];
1036 a[j][k][0] += a[i][l][0];
1038 xi = a[i][l][1] - a[j][k][1];
1039 a[j][k][1] += a[i][l][1];
1043 for (k = 1; k < n2h; k++) {
1045 xi = a[0][k][0] - a[0][l][0];
1046 a[0][k][0] += a[0][l][0];
1048 xi = a[0][l][1] - a[0][k][1];
1049 a[0][k][1] += a[0][l][1];
1051 xi = a[n1h][k][0] - a[n1h][l][0];
1052 a[n1h][k][0] += a[n1h][l][0];
1054 xi = a[n1h][l][1] - a[n1h][k][1];
1055 a[n1h][k][1] += a[n1h][l][1];
1059 for (i = 1; i < n1h; i++) {
1061 a[j][0][0] = 0.5 * (a[i][0][0] - a[j][0][0]);
1062 a[i][0][0] -= a[j][0][0];
1063 a[j][0][1] = 0.5 * (a[i][0][1] + a[j][0][1]);
1064 a[i][0][1] -= a[j][0][1];
1065 a[j][n2h][0] = 0.5 * (a[i][n2h][0] - a[j][n2h][0]);
1066 a[i][n2h][0] -= a[j][n2h][0];
1067 a[j][n2h][1] = 0.5 * (a[i][n2h][1] + a[j][n2h][1]);
1068 a[i][n2h][1] -= a[j][n2h][1];
1069 for (k = 1; k < n2h; k++) {
1071 a[j][l][0] = 0.5 * (a[i][k][0] - a[j][l][0]);
1072 a[i][k][0] -= a[j][l][0];
1073 a[j][l][1] = 0.5 * (a[i][k][1] + a[j][l][1]);
1074 a[i][k][1] -= a[j][l][1];
1075 a[i][l][0] = 0.5 * (a[j][k][0] - a[i][l][0]);
1076 a[j][k][0] -= a[i][l][0];
1077 a[i][l][1] = 0.5 * (a[j][k][1] + a[i][l][1]);
1078 a[j][k][1] -= a[i][l][1];
1081 for (k = 1; k < n2h; k++) {
1083 a[0][l][0] = 0.5 * (a[0][k][0] - a[0][l][0]);
1084 a[0][k][0] -= a[0][l][0];
1085 a[0][l][1] = 0.5 * (a[0][k][1] + a[0][l][1]);
1086 a[0][k][1] -= a[0][l][1];
1087 a[n1h][l][0] = 0.5 * (a[n1h][k][0] - a[n1h][l][0]);
1088 a[n1h][k][0] -= a[n1h][l][0];
1089 a[n1h][l][1] = 0.5 * (a[n1h][k][1] + a[n1h][l][1]);
1090 a[n1h][k][1] -= a[n1h][l][1];
1096 void ddxt3da_sub(
int n1,
int n2,
int n3,
int ics,
int isgn,
1097 double ***a,
double *t,
int *ip,
double *w)
1099 void ddct(
int n,
int isgn,
double *a,
int *ip,
double *w);
1100 void ddst(
int n,
int isgn,
double *a,
int *ip,
double *w);
1103 for (i = 0; i < n1; i++) {
1105 for (j = 0; j < n2; j++) {
1106 ddct(n3, isgn, a[i][j], ip, w);
1109 for (j = 0; j < n2; j++) {
1110 ddst(n3, isgn, a[i][j], ip, w);
1114 for (k = 0; k < n3; k += 4) {
1115 for (j = 0; j < n2; j++) {
1117 t[n2 + j] = a[i][j][k + 1];
1118 t[2 * n2 + j] = a[i][j][k + 2];
1119 t[3 * n2 + j] = a[i][j][k + 3];
1122 ddct(n2, isgn, t, ip, w);
1123 ddct(n2, isgn, &t[n2], ip, w);
1124 ddct(n2, isgn, &t[2 * n2], ip, w);
1125 ddct(n2, isgn, &t[3 * n2], ip, w);
1127 ddst(n2, isgn, t, ip, w);
1128 ddst(n2, isgn, &t[n2], ip, w);
1129 ddst(n2, isgn, &t[2 * n2], ip, w);
1130 ddst(n2, isgn, &t[3 * n2], ip, w);
1132 for (j = 0; j < n2; j++) {
1134 a[i][j][k + 1] = t[n2 + j];
1135 a[i][j][k + 2] = t[2 * n2 + j];
1136 a[i][j][k + 3] = t[3 * n2 + j];
1139 }
else if (n3 == 2) {
1140 for (j = 0; j < n2; j++) {
1142 t[n2 + j] = a[i][j][1];
1145 ddct(n2, isgn, t, ip, w);
1146 ddct(n2, isgn, &t[n2], ip, w);
1148 ddst(n2, isgn, t, ip, w);
1149 ddst(n2, isgn, &t[n2], ip, w);
1151 for (j = 0; j < n2; j++) {
1153 a[i][j][1] = t[n2 + j];
1160 void ddxt3db_sub(
int n1,
int n2,
int n3,
int ics,
int isgn,
1161 double ***a,
double *t,
int *ip,
double *w)
1163 void ddct(
int n,
int isgn,
double *a,
int *ip,
double *w);
1164 void ddst(
int n,
int isgn,
double *a,
int *ip,
double *w);
1168 for (j = 0; j < n2; j++) {
1169 for (k = 0; k < n3; k += 4) {
1170 for (i = 0; i < n1; i++) {
1172 t[n1 + i] = a[i][j][k + 1];
1173 t[2 * n1 + i] = a[i][j][k + 2];
1174 t[3 * n1 + i] = a[i][j][k + 3];
1177 ddct(n1, isgn, t, ip, w);
1178 ddct(n1, isgn, &t[n1], ip, w);
1179 ddct(n1, isgn, &t[2 * n1], ip, w);
1180 ddct(n1, isgn, &t[3 * n1], ip, w);
1182 ddst(n1, isgn, t, ip, w);
1183 ddst(n1, isgn, &t[n1], ip, w);
1184 ddst(n1, isgn, &t[2 * n1], ip, w);
1185 ddst(n1, isgn, &t[3 * n1], ip, w);
1187 for (i = 0; i < n1; i++) {
1189 a[i][j][k + 1] = t[n1 + i];
1190 a[i][j][k + 2] = t[2 * n1 + i];
1191 a[i][j][k + 3] = t[3 * n1 + i];
1195 }
else if (n3 == 2) {
1196 for (j = 0; j < n2; j++) {
1197 for (i = 0; i < n1; i++) {
1199 t[n1 + i] = a[i][j][1];
1202 ddct(n1, isgn, t, ip, w);
1203 ddct(n1, isgn, &t[n1], ip, w);
1205 ddst(n1, isgn, t, ip, w);
1206 ddst(n1, isgn, &t[n1], ip, w);
1208 for (i = 0; i < n1; i++) {
1210 a[i][j][1] = t[n1 + i];
1217 #ifdef USE_FFT3D_THREADS
1218 struct fft3d_arg_st {
1231 typedef struct fft3d_arg_st fft3d_arg_t;
1234 void xdft3da_subth(
int n1,
int n2,
int n3,
int icr,
int isgn,
1235 double ***a,
double *t,
int *ip,
double *w)
1237 void *xdft3da_th(
void *p);
1238 fft3d_thread_t th[FFT3D_MAX_THREADS];
1239 fft3d_arg_t ag[FFT3D_MAX_THREADS];
1242 nthread = FFT3D_MAX_THREADS;
1249 }
else if (n3 < 4) {
1252 for (i = 0; i < nthread; i++) {
1253 ag[i].nthread = nthread;
1261 ag[i].t = &t[nt * i];
1264 fft3d_thread_create(&th[i], xdft3da_th, &ag[i]);
1266 for (i = 0; i < nthread; i++) {
1267 fft3d_thread_wait(th[i]);
1272 void cdft3db_subth(
int n1,
int n2,
int n3,
int isgn,
double ***a,
1273 double *t,
int *ip,
double *w)
1275 void *cdft3db_th(
void *p);
1276 fft3d_thread_t th[FFT3D_MAX_THREADS];
1277 fft3d_arg_t ag[FFT3D_MAX_THREADS];
1280 nthread = FFT3D_MAX_THREADS;
1287 }
else if (n3 < 4) {
1290 for (i = 0; i < nthread; i++) {
1291 ag[i].nthread = nthread;
1298 ag[i].t = &t[nt * i];
1301 fft3d_thread_create(&th[i], cdft3db_th, &ag[i]);
1303 for (i = 0; i < nthread; i++) {
1304 fft3d_thread_wait(th[i]);
1309 void ddxt3da_subth(
int n1,
int n2,
int n3,
int ics,
int isgn,
1310 double ***a,
double *t,
int *ip,
double *w)
1312 void *ddxt3da_th(
void *p);
1313 fft3d_thread_t th[FFT3D_MAX_THREADS];
1314 fft3d_arg_t ag[FFT3D_MAX_THREADS];
1317 nthread = FFT3D_MAX_THREADS;
1325 for (i = 0; i < nthread; i++) {
1326 ag[i].nthread = nthread;
1334 ag[i].t = &t[nt * i];
1337 fft3d_thread_create(&th[i], ddxt3da_th, &ag[i]);
1339 for (i = 0; i < nthread; i++) {
1340 fft3d_thread_wait(th[i]);
1345 void ddxt3db_subth(
int n1,
int n2,
int n3,
int ics,
int isgn,
1346 double ***a,
double *t,
int *ip,
double *w)
1348 void *ddxt3db_th(
void *p);
1349 fft3d_thread_t th[FFT3D_MAX_THREADS];
1350 fft3d_arg_t ag[FFT3D_MAX_THREADS];
1353 nthread = FFT3D_MAX_THREADS;
1361 for (i = 0; i < nthread; i++) {
1362 ag[i].nthread = nthread;
1370 ag[i].t = &t[nt * i];
1373 fft3d_thread_create(&th[i], ddxt3db_th, &ag[i]);
1375 for (i = 0; i < nthread; i++) {
1376 fft3d_thread_wait(th[i]);
1381 void *xdft3da_th(
void *p)
1383 void cdft(
int n,
int isgn,
double *a,
int *ip,
double *w);
1384 void rdft(
int n,
int isgn,
double *a,
int *ip,
double *w);
1385 int nthread, n0, n1, n2, n3, icr, isgn, *ip, i, j, k;
1386 double ***a, *t, *w;
1388 nthread = ((fft3d_arg_t *) p)->nthread;
1389 n0 = ((fft3d_arg_t *) p)->n0;
1390 n1 = ((fft3d_arg_t *) p)->n1;
1391 n2 = ((fft3d_arg_t *) p)->n2;
1392 n3 = ((fft3d_arg_t *) p)->n3;
1393 icr = ((fft3d_arg_t *) p)->ic;
1394 isgn = ((fft3d_arg_t *) p)->isgn;
1395 a = ((fft3d_arg_t *) p)->a;
1396 t = ((fft3d_arg_t *) p)->t;
1397 ip = ((fft3d_arg_t *) p)->ip;
1398 w = ((fft3d_arg_t *) p)->w;
1399 for (i = n0; i < n1; i += nthread) {
1401 for (j = 0; j < n2; j++) {
1402 cdft(n3, isgn, a[i][j], ip, w);
1404 }
else if (isgn >= 0) {
1405 for (j = 0; j < n2; j++) {
1406 rdft(n3, isgn, a[i][j], ip, w);
1410 for (k = 0; k < n3; k += 8) {
1411 for (j = 0; j < n2; j++) {
1412 t[2 * j] = a[i][j][k];
1413 t[2 * j + 1] = a[i][j][k + 1];
1414 t[2 * n2 + 2 * j] = a[i][j][k + 2];
1415 t[2 * n2 + 2 * j + 1] = a[i][j][k + 3];
1416 t[4 * n2 + 2 * j] = a[i][j][k + 4];
1417 t[4 * n2 + 2 * j + 1] = a[i][j][k + 5];
1418 t[6 * n2 + 2 * j] = a[i][j][k + 6];
1419 t[6 * n2 + 2 * j + 1] = a[i][j][k + 7];
1421 cdft(2 * n2, isgn, t, ip, w);
1422 cdft(2 * n2, isgn, &t[2 * n2], ip, w);
1423 cdft(2 * n2, isgn, &t[4 * n2], ip, w);
1424 cdft(2 * n2, isgn, &t[6 * n2], ip, w);
1425 for (j = 0; j < n2; j++) {
1426 a[i][j][k] = t[2 * j];
1427 a[i][j][k + 1] = t[2 * j + 1];
1428 a[i][j][k + 2] = t[2 * n2 + 2 * j];
1429 a[i][j][k + 3] = t[2 * n2 + 2 * j + 1];
1430 a[i][j][k + 4] = t[4 * n2 + 2 * j];
1431 a[i][j][k + 5] = t[4 * n2 + 2 * j + 1];
1432 a[i][j][k + 6] = t[6 * n2 + 2 * j];
1433 a[i][j][k + 7] = t[6 * n2 + 2 * j + 1];
1436 }
else if (n3 == 4) {
1437 for (j = 0; j < n2; j++) {
1438 t[2 * j] = a[i][j][0];
1439 t[2 * j + 1] = a[i][j][1];
1440 t[2 * n2 + 2 * j] = a[i][j][2];
1441 t[2 * n2 + 2 * j + 1] = a[i][j][3];
1443 cdft(2 * n2, isgn, t, ip, w);
1444 cdft(2 * n2, isgn, &t[2 * n2], ip, w);
1445 for (j = 0; j < n2; j++) {
1446 a[i][j][0] = t[2 * j];
1447 a[i][j][1] = t[2 * j + 1];
1448 a[i][j][2] = t[2 * n2 + 2 * j];
1449 a[i][j][3] = t[2 * n2 + 2 * j + 1];
1451 }
else if (n3 == 2) {
1452 for (j = 0; j < n2; j++) {
1453 t[2 * j] = a[i][j][0];
1454 t[2 * j + 1] = a[i][j][1];
1456 cdft(2 * n2, isgn, t, ip, w);
1457 for (j = 0; j < n2; j++) {
1458 a[i][j][0] = t[2 * j];
1459 a[i][j][1] = t[2 * j + 1];
1462 if (icr != 0 && isgn < 0) {
1463 for (j = 0; j < n2; j++) {
1464 rdft(n3, isgn, a[i][j], ip, w);
1472 void *cdft3db_th(
void *p)
1474 void cdft(
int n,
int isgn,
double *a,
int *ip,
double *w);
1475 int nthread, n0, n1, n2, n3, isgn, *ip, i, j, k;
1476 double ***a, *t, *w;
1478 nthread = ((fft3d_arg_t *) p)->nthread;
1479 n0 = ((fft3d_arg_t *) p)->n0;
1480 n1 = ((fft3d_arg_t *) p)->n1;
1481 n2 = ((fft3d_arg_t *) p)->n2;
1482 n3 = ((fft3d_arg_t *) p)->n3;
1483 isgn = ((fft3d_arg_t *) p)->isgn;
1484 a = ((fft3d_arg_t *) p)->a;
1485 t = ((fft3d_arg_t *) p)->t;
1486 ip = ((fft3d_arg_t *) p)->ip;
1487 w = ((fft3d_arg_t *) p)->w;
1489 for (j = n0; j < n2; j += nthread) {
1490 for (k = 0; k < n3; k += 8) {
1491 for (i = 0; i < n1; i++) {
1492 t[2 * i] = a[i][j][k];
1493 t[2 * i + 1] = a[i][j][k + 1];
1494 t[2 * n1 + 2 * i] = a[i][j][k + 2];
1495 t[2 * n1 + 2 * i + 1] = a[i][j][k + 3];
1496 t[4 * n1 + 2 * i] = a[i][j][k + 4];
1497 t[4 * n1 + 2 * i + 1] = a[i][j][k + 5];
1498 t[6 * n1 + 2 * i] = a[i][j][k + 6];
1499 t[6 * n1 + 2 * i + 1] = a[i][j][k + 7];
1501 cdft(2 * n1, isgn, t, ip, w);
1502 cdft(2 * n1, isgn, &t[2 * n1], ip, w);
1503 cdft(2 * n1, isgn, &t[4 * n1], ip, w);
1504 cdft(2 * n1, isgn, &t[6 * n1], ip, w);
1505 for (i = 0; i < n1; i++) {
1506 a[i][j][k] = t[2 * i];
1507 a[i][j][k + 1] = t[2 * i + 1];
1508 a[i][j][k + 2] = t[2 * n1 + 2 * i];
1509 a[i][j][k + 3] = t[2 * n1 + 2 * i + 1];
1510 a[i][j][k + 4] = t[4 * n1 + 2 * i];
1511 a[i][j][k + 5] = t[4 * n1 + 2 * i + 1];
1512 a[i][j][k + 6] = t[6 * n1 + 2 * i];
1513 a[i][j][k + 7] = t[6 * n1 + 2 * i + 1];
1517 }
else if (n3 == 4) {
1518 for (j = n0; j < n2; j += nthread) {
1519 for (i = 0; i < n1; i++) {
1520 t[2 * i] = a[i][j][0];
1521 t[2 * i + 1] = a[i][j][1];
1522 t[2 * n1 + 2 * i] = a[i][j][2];
1523 t[2 * n1 + 2 * i + 1] = a[i][j][3];
1525 cdft(2 * n1, isgn, t, ip, w);
1526 cdft(2 * n1, isgn, &t[2 * n1], ip, w);
1527 for (i = 0; i < n1; i++) {
1528 a[i][j][0] = t[2 * i];
1529 a[i][j][1] = t[2 * i + 1];
1530 a[i][j][2] = t[2 * n1 + 2 * i];
1531 a[i][j][3] = t[2 * n1 + 2 * i + 1];
1534 }
else if (n3 == 2) {
1535 for (j = n0; j < n2; j += nthread) {
1536 for (i = 0; i < n1; i++) {
1537 t[2 * i] = a[i][j][0];
1538 t[2 * i + 1] = a[i][j][1];
1540 cdft(2 * n1, isgn, t, ip, w);
1541 for (i = 0; i < n1; i++) {
1542 a[i][j][0] = t[2 * i];
1543 a[i][j][1] = t[2 * i + 1];
1551 void *ddxt3da_th(
void *p)
1553 void ddct(
int n,
int isgn,
double *a,
int *ip,
double *w);
1554 void ddst(
int n,
int isgn,
double *a,
int *ip,
double *w);
1555 int nthread, n0, n1, n2, n3, ics, isgn, *ip, i, j, k;
1556 double ***a, *t, *w;
1558 nthread = ((fft3d_arg_t *) p)->nthread;
1559 n0 = ((fft3d_arg_t *) p)->n0;
1560 n1 = ((fft3d_arg_t *) p)->n1;
1561 n2 = ((fft3d_arg_t *) p)->n2;
1562 n3 = ((fft3d_arg_t *) p)->n3;
1563 ics = ((fft3d_arg_t *) p)->ic;
1564 isgn = ((fft3d_arg_t *) p)->isgn;
1565 a = ((fft3d_arg_t *) p)->a;
1566 t = ((fft3d_arg_t *) p)->t;
1567 ip = ((fft3d_arg_t *) p)->ip;
1568 w = ((fft3d_arg_t *) p)->w;
1569 for (i = n0; i < n1; i += nthread) {
1571 for (j = 0; j < n2; j++) {
1572 ddct(n3, isgn, a[i][j], ip, w);
1575 for (j = 0; j < n2; j++) {
1576 ddst(n3, isgn, a[i][j], ip, w);
1580 for (k = 0; k < n3; k += 4) {
1581 for (j = 0; j < n2; j++) {
1583 t[n2 + j] = a[i][j][k + 1];
1584 t[2 * n2 + j] = a[i][j][k + 2];
1585 t[3 * n2 + j] = a[i][j][k + 3];
1588 ddct(n2, isgn, t, ip, w);
1589 ddct(n2, isgn, &t[n2], ip, w);
1590 ddct(n2, isgn, &t[2 * n2], ip, w);
1591 ddct(n2, isgn, &t[3 * n2], ip, w);
1593 ddst(n2, isgn, t, ip, w);
1594 ddst(n2, isgn, &t[n2], ip, w);
1595 ddst(n2, isgn, &t[2 * n2], ip, w);
1596 ddst(n2, isgn, &t[3 * n2], ip, w);
1598 for (j = 0; j < n2; j++) {
1600 a[i][j][k + 1] = t[n2 + j];
1601 a[i][j][k + 2] = t[2 * n2 + j];
1602 a[i][j][k + 3] = t[3 * n2 + j];
1605 }
else if (n3 == 2) {
1606 for (j = 0; j < n2; j++) {
1608 t[n2 + j] = a[i][j][1];
1611 ddct(n2, isgn, t, ip, w);
1612 ddct(n2, isgn, &t[n2], ip, w);
1614 ddst(n2, isgn, t, ip, w);
1615 ddst(n2, isgn, &t[n2], ip, w);
1617 for (j = 0; j < n2; j++) {
1619 a[i][j][1] = t[n2 + j];
1627 void *ddxt3db_th(
void *p)
1629 void ddct(
int n,
int isgn,
double *a,
int *ip,
double *w);
1630 void ddst(
int n,
int isgn,
double *a,
int *ip,
double *w);
1631 int nthread, n0, n1, n2, n3, ics, isgn, *ip, i, j, k;
1632 double ***a, *t, *w;
1634 nthread = ((fft3d_arg_t *) p)->nthread;
1635 n0 = ((fft3d_arg_t *) p)->n0;
1636 n1 = ((fft3d_arg_t *) p)->n1;
1637 n2 = ((fft3d_arg_t *) p)->n2;
1638 n3 = ((fft3d_arg_t *) p)->n3;
1639 ics = ((fft3d_arg_t *) p)->ic;
1640 isgn = ((fft3d_arg_t *) p)->isgn;
1641 a = ((fft3d_arg_t *) p)->a;
1642 t = ((fft3d_arg_t *) p)->t;
1643 ip = ((fft3d_arg_t *) p)->ip;
1644 w = ((fft3d_arg_t *) p)->w;
1646 for (j = n0; j < n2; j += nthread) {
1647 for (k = 0; k < n3; k += 4) {
1648 for (i = 0; i < n1; i++) {
1650 t[n1 + i] = a[i][j][k + 1];
1651 t[2 * n1 + i] = a[i][j][k + 2];
1652 t[3 * n1 + i] = a[i][j][k + 3];
1655 ddct(n1, isgn, t, ip, w);
1656 ddct(n1, isgn, &t[n1], ip, w);
1657 ddct(n1, isgn, &t[2 * n1], ip, w);
1658 ddct(n1, isgn, &t[3 * n1], ip, w);
1660 ddst(n1, isgn, t, ip, w);
1661 ddst(n1, isgn, &t[n1], ip, w);
1662 ddst(n1, isgn, &t[2 * n1], ip, w);
1663 ddst(n1, isgn, &t[3 * n1], ip, w);
1665 for (i = 0; i < n1; i++) {
1667 a[i][j][k + 1] = t[n1 + i];
1668 a[i][j][k + 2] = t[2 * n1 + i];
1669 a[i][j][k + 3] = t[3 * n1 + i];
1673 }
else if (n3 == 2) {
1674 for (j = n0; j < n2; j += nthread) {
1675 for (i = 0; i < n1; i++) {
1677 t[n1 + i] = a[i][j][1];
1680 ddct(n1, isgn, t, ip, w);
1681 ddct(n1, isgn, &t[n1], ip, w);
1683 ddst(n1, isgn, t, ip, w);
1684 ddst(n1, isgn, &t[n1], ip, w);
1686 for (i = 0; i < n1; i++) {
1688 a[i][j][1] = t[n1 + i];