comparison peck_fft.c @ 10:05f6ab0a17c0

backup
author Peter Meerwald <p.meerwald@bct-electronic.com>
date Wed, 21 Sep 2011 15:20:58 +0200
parents 8726585681f6
children abdcde012978
comparison
equal deleted inserted replaced
9:8726585681f6 10:05f6ab0a17c0
17 /* The guts header contains all the multiplication and addition macros that are defined for 17 /* The guts header contains all the multiplication and addition macros that are defined for
18 * fixed or floating point complex numbers. It also delares the kf_ internal functions. 18 * fixed or floating point complex numbers. It also delares the kf_ internal functions.
19 */ 19 */
20 #if !BFLY2_ASM 20 #if !BFLY2_ASM
21 static void kf_bfly2( 21 static void kf_bfly2(
22 peck_fft_cpx *Fout, 22 peck_fft_cpx * __restrict Fout,
23 const size_t fstride, 23 const size_t fstride,
24 const peck_fft_cfg st, 24 const peck_fft_cfg st,
25 int m) { 25 int m) {
26 26
27 // printf("kf_bfly2, %d\n", fstride); 27 // printf("kf_bfly2, %d\n", fstride);
38 ++Fout; 38 ++Fout;
39 } while (--m); 39 } while (--m);
40 } 40 }
41 #endif 41 #endif
42 42
43 #if !BFLY4_ASM
43 static void kf_bfly4( 44 static void kf_bfly4(
44 peck_fft_cpx * Fout, 45 peck_fft_cpx * __restrict Fout,
45 const size_t fstride, 46 const size_t fstride,
46 const peck_fft_cfg st, 47 const peck_fft_cfg st,
47 const size_t m) { 48 const size_t m) {
48 49
49 peck_fft_cpx *tw1,*tw2,*tw3; 50 peck_fft_cpx scratch[4];
50 peck_fft_cpx scratch[6]; 51 peck_fft_cpx * __restrict tw1, * __restrict tw2, * __restrict tw3;
51 size_t k=m; 52 size_t k = m;
52 const size_t m2=2*m; 53 const size_t m2 = 2*m;
53 const size_t m3=3*m; 54 const size_t m3 = 3*m;
54 55
55 // printf("kf_bfly4, %d\n", fstride); 56 // printf("kf_bfly4, %d\n", fstride);
56 57
57 tw3 = tw2 = tw1 = st->twiddles; 58 tw3 = tw2 = tw1 = st->twiddles;
58 59
59 do { 60 if (st->inverse) {
60 C_MUL(scratch[0], Fout[m], *tw1); 61 do {
61 C_MUL(scratch[1], Fout[m2], *tw2); 62 C_MUL(scratch[0], Fout[m], *tw1);
62 C_MUL(scratch[2], Fout[m3], *tw3); 63 C_MUL(scratch[3], Fout[m2], *tw2);
63 64 C_MUL(scratch[2], Fout[m3], *tw3);
64 C_SUB(scratch[5], *Fout, scratch[1]); 65
65 C_ADDTO(*Fout, scratch[1]); 66 C_SUB(scratch[1], *Fout, scratch[3]);
66 C_ADD(scratch[3], scratch[0], scratch[2]); 67 C_ADDTO(*Fout, scratch[3]);
67 C_SUB(scratch[4], scratch[0], scratch[2]); 68
68 C_SUB(Fout[m2], *Fout, scratch[3]); 69 C_ADD(scratch[3], scratch[0], scratch[2]);
69 tw1 += fstride; 70 C_SUB(Fout[m2], *Fout, scratch[3]);
70 tw2 += fstride*2; 71 C_ADDTO(*Fout, scratch[3]);
71 tw3 += fstride*3; 72
72 C_ADDTO(*Fout, scratch[3]); 73 tw1 += fstride;
73 74 tw2 += fstride*2;
74 if (st->inverse) { 75 tw3 += fstride*3;
75 Fout[m].r = scratch[5].r - scratch[4].i; 76
76 Fout[m].i = scratch[5].i + scratch[4].r; 77 C_SUB(scratch[3], scratch[0], scratch[2]);
77 Fout[m3].r = scratch[5].r + scratch[4].i; 78 Fout[m].r = scratch[1].r - scratch[3].i;
78 Fout[m3].i = scratch[5].i - scratch[4].r; 79 Fout[m].i = scratch[1].i + scratch[3].r;
79 } else { 80 Fout[m3].r = scratch[1].r + scratch[3].i;
80 Fout[m].r = scratch[5].r + scratch[4].i; 81 Fout[m3].i = scratch[1].i - scratch[3].r;
81 Fout[m].i = scratch[5].i - scratch[4].r; 82
82 Fout[m3].r = scratch[5].r - scratch[4].i; 83 ++Fout;
83 Fout[m3].i = scratch[5].i + scratch[4].r; 84 } while (--k);
84 } 85 }
85 ++Fout; 86 else {
86 } while (--k); 87 do {
87 } 88 C_MUL(scratch[0], Fout[m], *tw1);
89 C_MUL(scratch[3], Fout[m2], *tw2);
90 C_MUL(scratch[2], Fout[m3], *tw3);
91
92 C_SUB(scratch[1], *Fout, scratch[3]);
93 C_ADDTO(*Fout, scratch[3]);
94
95 C_ADD(scratch[3], scratch[0], scratch[2]);
96 C_SUB(Fout[m2], *Fout, scratch[3]);
97 C_ADDTO(*Fout, scratch[3]);
98
99 tw1 += fstride;
100 tw2 += fstride*2;
101 tw3 += fstride*3;
102
103 C_SUB(scratch[3], scratch[0], scratch[2]);
104 Fout[m].r = scratch[1].r + scratch[3].i;
105 Fout[m].i = scratch[1].i - scratch[3].r;
106 Fout[m3].r = scratch[1].r - scratch[3].i;
107 Fout[m3].i = scratch[1].i + scratch[3].r;
108
109 ++Fout;
110 } while (--k);
111 }
112 }
113 #endif
88 114
89 static void kf_bfly3( 115 static void kf_bfly3(
90 peck_fft_cpx * Fout, 116 peck_fft_cpx * Fout,
91 const size_t fstride, 117 const size_t fstride,
92 const peck_fft_cfg st, 118 const peck_fft_cfg st,

Repositories maintained by Peter Meerwald, pmeerw@pmeerw.net.