Mercurial > hg > peckfft
comparison peck_fft.c @ 10:05f6ab0a17c0
backup
author | Peter Meerwald <p.meerwald@bct-electronic.com> |
---|---|
date | Wed, 21 Sep 2011 15:20:58 +0200 |
parents | 8726585681f6 |
children | abdcde012978 |
comparison
equal
deleted
inserted
replaced
9:8726585681f6 | 10:05f6ab0a17c0 |
---|---|
17 /* The guts header contains all the multiplication and addition macros that are defined for | 17 /* The guts header contains all the multiplication and addition macros that are defined for |
18 * fixed or floating point complex numbers. It also delares the kf_ internal functions. | 18 * fixed or floating point complex numbers. It also delares the kf_ internal functions. |
19 */ | 19 */ |
20 #if !BFLY2_ASM | 20 #if !BFLY2_ASM |
21 static void kf_bfly2( | 21 static void kf_bfly2( |
22 peck_fft_cpx *Fout, | 22 peck_fft_cpx * __restrict Fout, |
23 const size_t fstride, | 23 const size_t fstride, |
24 const peck_fft_cfg st, | 24 const peck_fft_cfg st, |
25 int m) { | 25 int m) { |
26 | 26 |
27 // printf("kf_bfly2, %d\n", fstride); | 27 // printf("kf_bfly2, %d\n", fstride); |
38 ++Fout; | 38 ++Fout; |
39 } while (--m); | 39 } while (--m); |
40 } | 40 } |
41 #endif | 41 #endif |
42 | 42 |
43 #if !BFLY4_ASM | |
43 static void kf_bfly4( | 44 static void kf_bfly4( |
44 peck_fft_cpx * Fout, | 45 peck_fft_cpx * __restrict Fout, |
45 const size_t fstride, | 46 const size_t fstride, |
46 const peck_fft_cfg st, | 47 const peck_fft_cfg st, |
47 const size_t m) { | 48 const size_t m) { |
48 | 49 |
49 peck_fft_cpx *tw1,*tw2,*tw3; | 50 peck_fft_cpx scratch[4]; |
50 peck_fft_cpx scratch[6]; | 51 peck_fft_cpx * __restrict tw1, * __restrict tw2, * __restrict tw3; |
51 size_t k=m; | 52 size_t k = m; |
52 const size_t m2=2*m; | 53 const size_t m2 = 2*m; |
53 const size_t m3=3*m; | 54 const size_t m3 = 3*m; |
54 | 55 |
55 // printf("kf_bfly4, %d\n", fstride); | 56 // printf("kf_bfly4, %d\n", fstride); |
56 | 57 |
57 tw3 = tw2 = tw1 = st->twiddles; | 58 tw3 = tw2 = tw1 = st->twiddles; |
58 | 59 |
59 do { | 60 if (st->inverse) { |
60 C_MUL(scratch[0], Fout[m], *tw1); | 61 do { |
61 C_MUL(scratch[1], Fout[m2], *tw2); | 62 C_MUL(scratch[0], Fout[m], *tw1); |
62 C_MUL(scratch[2], Fout[m3], *tw3); | 63 C_MUL(scratch[3], Fout[m2], *tw2); |
63 | 64 C_MUL(scratch[2], Fout[m3], *tw3); |
64 C_SUB(scratch[5], *Fout, scratch[1]); | 65 |
65 C_ADDTO(*Fout, scratch[1]); | 66 C_SUB(scratch[1], *Fout, scratch[3]); |
66 C_ADD(scratch[3], scratch[0], scratch[2]); | 67 C_ADDTO(*Fout, scratch[3]); |
67 C_SUB(scratch[4], scratch[0], scratch[2]); | 68 |
68 C_SUB(Fout[m2], *Fout, scratch[3]); | 69 C_ADD(scratch[3], scratch[0], scratch[2]); |
69 tw1 += fstride; | 70 C_SUB(Fout[m2], *Fout, scratch[3]); |
70 tw2 += fstride*2; | 71 C_ADDTO(*Fout, scratch[3]); |
71 tw3 += fstride*3; | 72 |
72 C_ADDTO(*Fout, scratch[3]); | 73 tw1 += fstride; |
73 | 74 tw2 += fstride*2; |
74 if (st->inverse) { | 75 tw3 += fstride*3; |
75 Fout[m].r = scratch[5].r - scratch[4].i; | 76 |
76 Fout[m].i = scratch[5].i + scratch[4].r; | 77 C_SUB(scratch[3], scratch[0], scratch[2]); |
77 Fout[m3].r = scratch[5].r + scratch[4].i; | 78 Fout[m].r = scratch[1].r - scratch[3].i; |
78 Fout[m3].i = scratch[5].i - scratch[4].r; | 79 Fout[m].i = scratch[1].i + scratch[3].r; |
79 } else { | 80 Fout[m3].r = scratch[1].r + scratch[3].i; |
80 Fout[m].r = scratch[5].r + scratch[4].i; | 81 Fout[m3].i = scratch[1].i - scratch[3].r; |
81 Fout[m].i = scratch[5].i - scratch[4].r; | 82 |
82 Fout[m3].r = scratch[5].r - scratch[4].i; | 83 ++Fout; |
83 Fout[m3].i = scratch[5].i + scratch[4].r; | 84 } while (--k); |
84 } | 85 } |
85 ++Fout; | 86 else { |
86 } while (--k); | 87 do { |
87 } | 88 C_MUL(scratch[0], Fout[m], *tw1); |
89 C_MUL(scratch[3], Fout[m2], *tw2); | |
90 C_MUL(scratch[2], Fout[m3], *tw3); | |
91 | |
92 C_SUB(scratch[1], *Fout, scratch[3]); | |
93 C_ADDTO(*Fout, scratch[3]); | |
94 | |
95 C_ADD(scratch[3], scratch[0], scratch[2]); | |
96 C_SUB(Fout[m2], *Fout, scratch[3]); | |
97 C_ADDTO(*Fout, scratch[3]); | |
98 | |
99 tw1 += fstride; | |
100 tw2 += fstride*2; | |
101 tw3 += fstride*3; | |
102 | |
103 C_SUB(scratch[3], scratch[0], scratch[2]); | |
104 Fout[m].r = scratch[1].r + scratch[3].i; | |
105 Fout[m].i = scratch[1].i - scratch[3].r; | |
106 Fout[m3].r = scratch[1].r - scratch[3].i; | |
107 Fout[m3].i = scratch[1].i + scratch[3].r; | |
108 | |
109 ++Fout; | |
110 } while (--k); | |
111 } | |
112 } | |
113 #endif | |
88 | 114 |
89 static void kf_bfly3( | 115 static void kf_bfly3( |
90 peck_fft_cpx * Fout, | 116 peck_fft_cpx * Fout, |
91 const size_t fstride, | 117 const size_t fstride, |
92 const peck_fft_cfg st, | 118 const peck_fft_cfg st, |