Mercurial > hg > peckfft
comparison peck_fft.c @ 4:2d6c49fcafcb
neon2 and neon4 support
author | Peter Meerwald <p.meerwald@bct-electronic.com> |
---|---|
date | Fri, 16 Sep 2011 14:04:19 +0200 |
parents | cfec79393811 |
children | c7237a7544eb |
comparison
equal
deleted
inserted
replaced
3:3b31bd44a09f | 4:2d6c49fcafcb |
---|---|
17 /* The guts header contains all the multiplication and addition macros that are defined for | 17 /* The guts header contains all the multiplication and addition macros that are defined for |
18 * fixed or floating point complex numbers. It also delares the kf_ internal functions. | 18 * fixed or floating point complex numbers. It also delares the kf_ internal functions. |
19 */ | 19 */ |
20 | 20 |
21 static void kf_bfly2( | 21 static void kf_bfly2( |
22 peck_fft_cpx * Fout, | 22 peck_fft_cpx * Fout, |
23 const size_t fstride, | 23 const size_t fstride, |
24 const peck_fft_cfg st, | 24 const peck_fft_cfg st, |
25 int m) { | 25 int m) { |
26 | 26 |
27 //printf("kf_bfly2\n"); | 27 //printf("kf_bfly2\n"); |
28 | 28 |
29 peck_fft_cpx * Fout2; | 29 peck_fft_cpx * Fout2; |
30 peck_fft_cpx * tw1 = st->twiddles; | 30 peck_fft_cpx * tw1 = st->twiddles; |
42 ++Fout; | 42 ++Fout; |
43 } while (--m); | 43 } while (--m); |
44 } | 44 } |
45 | 45 |
46 static void kf_bfly4( | 46 static void kf_bfly4( |
47 peck_fft_cpx * Fout, | 47 peck_fft_cpx * Fout, |
48 const size_t fstride, | 48 const size_t fstride, |
49 const peck_fft_cfg st, | 49 const peck_fft_cfg st, |
50 const size_t m) { | 50 const size_t m) { |
51 | |
51 peck_fft_cpx *tw1,*tw2,*tw3; | 52 peck_fft_cpx *tw1,*tw2,*tw3; |
52 peck_fft_cpx scratch[6]; | 53 peck_fft_cpx scratch[6]; |
53 size_t k=m; | 54 size_t k=m; |
54 const size_t m2=2*m; | 55 const size_t m2=2*m; |
55 const size_t m3=3*m; | 56 const size_t m3=3*m; |
92 ++Fout; | 93 ++Fout; |
93 } while (--k); | 94 } while (--k); |
94 } | 95 } |
95 | 96 |
96 static void kf_bfly3( | 97 static void kf_bfly3( |
97 peck_fft_cpx * Fout, | 98 peck_fft_cpx * Fout, |
98 const size_t fstride, | 99 const size_t fstride, |
99 const peck_fft_cfg st, | 100 const peck_fft_cfg st, |
100 size_t m) { | 101 size_t m) { |
102 | |
101 size_t k=m; | 103 size_t k=m; |
102 const size_t m2 = 2*m; | 104 const size_t m2 = 2*m; |
103 peck_fft_cpx *tw1, *tw2; | 105 peck_fft_cpx *tw1, *tw2; |
104 peck_fft_cpx scratch[5]; | 106 peck_fft_cpx scratch[5]; |
105 peck_fft_cpx epi3; | 107 peck_fft_cpx epi3; |
106 epi3 = st->twiddles[fstride*m]; | 108 epi3 = st->twiddles[fstride*m]; |
107 | 109 |
108 printf("kf_bfly3\n"); | 110 printf("kf_bfly3\n"); |
109 | 111 |
110 | |
111 tw1=tw2=st->twiddles; | 112 tw1=tw2=st->twiddles; |
112 | 113 |
113 do { | 114 do { |
114 C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3); | 115 C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3); |
115 | 116 |
137 ++Fout; | 138 ++Fout; |
138 } while (--k); | 139 } while (--k); |
139 } | 140 } |
140 | 141 |
141 static void kf_bfly5( | 142 static void kf_bfly5( |
142 peck_fft_cpx * Fout, | 143 peck_fft_cpx * Fout, |
143 const size_t fstride, | 144 const size_t fstride, |
144 const peck_fft_cfg st, | 145 const peck_fft_cfg st, |
145 int m | 146 int m |
146 ) | 147 ) { |
147 { | |
148 peck_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; | 148 peck_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; |
149 int u; | 149 int u; |
150 peck_fft_cpx scratch[13]; | 150 peck_fft_cpx scratch[13]; |
151 peck_fft_cpx * twiddles = st->twiddles; | 151 peck_fft_cpx * twiddles = st->twiddles; |
152 peck_fft_cpx *tw; | 152 peck_fft_cpx *tw; |
154 ya = twiddles[fstride*m]; | 154 ya = twiddles[fstride*m]; |
155 yb = twiddles[fstride*2*m]; | 155 yb = twiddles[fstride*2*m]; |
156 | 156 |
157 printf("kf_bfly5\n"); | 157 printf("kf_bfly5\n"); |
158 | 158 |
159 | |
160 Fout0=Fout; | 159 Fout0=Fout; |
161 Fout1=Fout0+m; | 160 Fout1=Fout0+m; |
162 Fout2=Fout0+2*m; | 161 Fout2=Fout0+2*m; |
163 Fout3=Fout0+3*m; | 162 Fout3=Fout0+3*m; |
164 Fout4=Fout0+4*m; | 163 Fout4=Fout0+4*m; |
165 | 164 |
166 tw=st->twiddles; | 165 tw=st->twiddles; |
167 for ( u=0; u<m; ++u ) { | 166 for (u = 0; u < m; ++u) { |
168 C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5); | 167 C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5); |
169 scratch[0] = *Fout0; | 168 scratch[0] = *Fout0; |
170 | 169 |
171 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]); | 170 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]); |
172 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]); | 171 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]); |
202 } | 201 } |
203 } | 202 } |
204 | 203 |
205 /* perform the butterfly for one stage of a mixed radix FFT */ | 204 /* perform the butterfly for one stage of a mixed radix FFT */ |
206 static void kf_bfly_generic( | 205 static void kf_bfly_generic( |
207 peck_fft_cpx * Fout, | 206 peck_fft_cpx * Fout, |
208 const size_t fstride, | 207 const size_t fstride, |
209 const peck_fft_cfg st, | 208 const peck_fft_cfg st, |
210 int m, | 209 int m, |
211 int p | 210 int p) { |
212 ) | 211 |
213 { | |
214 int u,k,q1,q; | 212 int u,k,q1,q; |
215 peck_fft_cpx * twiddles = st->twiddles; | 213 peck_fft_cpx * twiddles = st->twiddles; |
216 peck_fft_cpx t; | 214 peck_fft_cpx t; |
217 int Norig = st->nfft; | 215 int Norig = st->nfft; |
218 | 216 |
219 printf("kf_bfly_generic\n"); | 217 printf("kf_bfly_generic\n"); |
220 | |
221 | 218 |
222 peck_fft_cpx * scratch = (peck_fft_cpx*)PECK_FFT_TMP_ALLOC(sizeof(peck_fft_cpx)*p); | 219 peck_fft_cpx * scratch = (peck_fft_cpx*)PECK_FFT_TMP_ALLOC(sizeof(peck_fft_cpx)*p); |
223 | 220 |
224 for ( u=0; u<m; ++u ) { | 221 for ( u=0; u<m; ++u ) { |
225 k=u; | 222 k=u; |