Mercurial > hg > peckfft
comparison peck_fft.c @ 0:723f588b82ac
import
author | Peter Meerwald <p.meerwald@bct-electronic.com> |
---|---|
date | Fri, 16 Sep 2011 12:53:08 +0200 |
parents | |
children | cfec79393811 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:723f588b82ac |
---|---|
1 /* | |
2 Copyright (c) 2003-2010, Mark Borgerding | |
3 | |
4 All rights reserved. | |
5 | |
6 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: | |
7 | |
8 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. | |
9 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. | |
10 * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission. | |
11 | |
12 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
13 */ | |
14 | |
15 | |
16 #include "_peck_fft_guts.h" | |
17 /* The guts header contains all the multiplication and addition macros that are defined for | |
18 fixed or floating point complex numbers. It also delares the kf_ internal functions. | |
19 */ | |
20 | |
21 static void kf_bfly2( | |
22 peck_fft_cpx * Fout, | |
23 const size_t fstride, | |
24 const peck_fft_cfg st, | |
25 int m | |
26 ) | |
27 { | |
28 | |
29 //printf("kf_bfly2\n"); | |
30 | |
31 peck_fft_cpx * Fout2; | |
32 peck_fft_cpx * tw1 = st->twiddles; | |
33 peck_fft_cpx t; | |
34 Fout2 = Fout + m; | |
35 do{ | |
36 C_FIXDIV(*Fout,2); C_FIXDIV(*Fout2,2); | |
37 | |
38 C_MUL (t, *Fout2 , *tw1); | |
39 tw1 += fstride; | |
40 C_SUB( *Fout2 , *Fout , t ); | |
41 C_ADDTO( *Fout , t ); | |
42 ++Fout2; | |
43 ++Fout; | |
44 }while (--m); | |
45 } | |
46 | |
47 static void kf_bfly4( | |
48 peck_fft_cpx * Fout, | |
49 const size_t fstride, | |
50 const peck_fft_cfg st, | |
51 const size_t m | |
52 ) | |
53 { | |
54 peck_fft_cpx *tw1,*tw2,*tw3; | |
55 peck_fft_cpx scratch[6]; | |
56 size_t k=m; | |
57 const size_t m2=2*m; | |
58 const size_t m3=3*m; | |
59 | |
60 //printf("kf_bfly4\n"); | |
61 | |
62 | |
63 tw3 = tw2 = tw1 = st->twiddles; | |
64 | |
65 do { | |
66 C_FIXDIV(*Fout,4); C_FIXDIV(Fout[m],4); C_FIXDIV(Fout[m2],4); C_FIXDIV(Fout[m3],4); | |
67 | |
68 C_MUL(scratch[0],Fout[m] , *tw1 ); | |
69 C_MUL(scratch[1],Fout[m2] , *tw2 ); | |
70 C_MUL(scratch[2],Fout[m3] , *tw3 ); | |
71 | |
72 C_SUB( scratch[5] , *Fout, scratch[1] ); | |
73 C_ADDTO(*Fout, scratch[1]); | |
74 C_ADD( scratch[3] , scratch[0] , scratch[2] ); | |
75 C_SUB( scratch[4] , scratch[0] , scratch[2] ); | |
76 C_SUB( Fout[m2], *Fout, scratch[3] ); | |
77 tw1 += fstride; | |
78 tw2 += fstride*2; | |
79 tw3 += fstride*3; | |
80 C_ADDTO( *Fout , scratch[3] ); | |
81 | |
82 if(st->inverse) { | |
83 Fout[m].r = scratch[5].r - scratch[4].i; | |
84 Fout[m].i = scratch[5].i + scratch[4].r; | |
85 Fout[m3].r = scratch[5].r + scratch[4].i; | |
86 Fout[m3].i = scratch[5].i - scratch[4].r; | |
87 }else{ | |
88 Fout[m].r = scratch[5].r + scratch[4].i; | |
89 Fout[m].i = scratch[5].i - scratch[4].r; | |
90 Fout[m3].r = scratch[5].r - scratch[4].i; | |
91 Fout[m3].i = scratch[5].i + scratch[4].r; | |
92 } | |
93 ++Fout; | |
94 }while(--k); | |
95 } | |
96 | |
97 static void kf_bfly3( | |
98 peck_fft_cpx * Fout, | |
99 const size_t fstride, | |
100 const peck_fft_cfg st, | |
101 size_t m | |
102 ) | |
103 { | |
104 size_t k=m; | |
105 const size_t m2 = 2*m; | |
106 peck_fft_cpx *tw1,*tw2; | |
107 peck_fft_cpx scratch[5]; | |
108 peck_fft_cpx epi3; | |
109 epi3 = st->twiddles[fstride*m]; | |
110 | |
111 printf("kf_bfly3\n"); | |
112 | |
113 | |
114 tw1=tw2=st->twiddles; | |
115 | |
116 do{ | |
117 C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3); | |
118 | |
119 C_MUL(scratch[1],Fout[m] , *tw1); | |
120 C_MUL(scratch[2],Fout[m2] , *tw2); | |
121 | |
122 C_ADD(scratch[3],scratch[1],scratch[2]); | |
123 C_SUB(scratch[0],scratch[1],scratch[2]); | |
124 tw1 += fstride; | |
125 tw2 += fstride*2; | |
126 | |
127 Fout[m].r = Fout->r - HALF_OF(scratch[3].r); | |
128 Fout[m].i = Fout->i - HALF_OF(scratch[3].i); | |
129 | |
130 C_MULBYSCALAR( scratch[0] , epi3.i ); | |
131 | |
132 C_ADDTO(*Fout,scratch[3]); | |
133 | |
134 Fout[m2].r = Fout[m].r + scratch[0].i; | |
135 Fout[m2].i = Fout[m].i - scratch[0].r; | |
136 | |
137 Fout[m].r -= scratch[0].i; | |
138 Fout[m].i += scratch[0].r; | |
139 | |
140 ++Fout; | |
141 }while(--k); | |
142 } | |
143 | |
144 static void kf_bfly5( | |
145 peck_fft_cpx * Fout, | |
146 const size_t fstride, | |
147 const peck_fft_cfg st, | |
148 int m | |
149 ) | |
150 { | |
151 peck_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; | |
152 int u; | |
153 peck_fft_cpx scratch[13]; | |
154 peck_fft_cpx * twiddles = st->twiddles; | |
155 peck_fft_cpx *tw; | |
156 peck_fft_cpx ya,yb; | |
157 ya = twiddles[fstride*m]; | |
158 yb = twiddles[fstride*2*m]; | |
159 | |
160 printf("kf_bfly5\n"); | |
161 | |
162 | |
163 Fout0=Fout; | |
164 Fout1=Fout0+m; | |
165 Fout2=Fout0+2*m; | |
166 Fout3=Fout0+3*m; | |
167 Fout4=Fout0+4*m; | |
168 | |
169 tw=st->twiddles; | |
170 for ( u=0; u<m; ++u ) { | |
171 C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5); | |
172 scratch[0] = *Fout0; | |
173 | |
174 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]); | |
175 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]); | |
176 C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]); | |
177 C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]); | |
178 | |
179 C_ADD( scratch[7],scratch[1],scratch[4]); | |
180 C_SUB( scratch[10],scratch[1],scratch[4]); | |
181 C_ADD( scratch[8],scratch[2],scratch[3]); | |
182 C_SUB( scratch[9],scratch[2],scratch[3]); | |
183 | |
184 Fout0->r += scratch[7].r + scratch[8].r; | |
185 Fout0->i += scratch[7].i + scratch[8].i; | |
186 | |
187 scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r); | |
188 scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r); | |
189 | |
190 scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i); | |
191 scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i); | |
192 | |
193 C_SUB(*Fout1,scratch[5],scratch[6]); | |
194 C_ADD(*Fout4,scratch[5],scratch[6]); | |
195 | |
196 scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r); | |
197 scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r); | |
198 scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i); | |
199 scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i); | |
200 | |
201 C_ADD(*Fout2,scratch[11],scratch[12]); | |
202 C_SUB(*Fout3,scratch[11],scratch[12]); | |
203 | |
204 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; | |
205 } | |
206 } | |
207 | |
208 /* perform the butterfly for one stage of a mixed radix FFT */ | |
209 static void kf_bfly_generic( | |
210 peck_fft_cpx * Fout, | |
211 const size_t fstride, | |
212 const peck_fft_cfg st, | |
213 int m, | |
214 int p | |
215 ) | |
216 { | |
217 int u,k,q1,q; | |
218 peck_fft_cpx * twiddles = st->twiddles; | |
219 peck_fft_cpx t; | |
220 int Norig = st->nfft; | |
221 | |
222 printf("kf_bfly_generic\n"); | |
223 | |
224 | |
225 peck_fft_cpx * scratch = (peck_fft_cpx*)PECK_FFT_TMP_ALLOC(sizeof(peck_fft_cpx)*p); | |
226 | |
227 for ( u=0; u<m; ++u ) { | |
228 k=u; | |
229 for (q1 = 0; q1 < p; ++q1) { | |
230 scratch[q1] = Fout[k]; | |
231 C_FIXDIV(scratch[q1], p); | |
232 k += m; | |
233 } | |
234 | |
235 k=u; | |
236 for ( q1=0 ; q1<p ; ++q1 ) { | |
237 int twidx=0; | |
238 Fout[ k ] = scratch[0]; | |
239 for (q=1;q<p;++q ) { | |
240 twidx += fstride * k; | |
241 if (twidx>=Norig) twidx-=Norig; | |
242 C_MUL(t,scratch[q] , twiddles[twidx] ); | |
243 C_ADDTO( Fout[ k ] ,t); | |
244 } | |
245 k += m; | |
246 } | |
247 } | |
248 PECK_FFT_TMP_FREE(scratch); | |
249 } | |
250 | |
251 static void kf_work( | |
252 peck_fft_cpx * Fout, | |
253 const peck_fft_cpx * f, | |
254 const size_t fstride, | |
255 int *factors, | |
256 const peck_fft_cfg st) { | |
257 peck_fft_cpx *Fout_beg = Fout; | |
258 const int p = *factors++; /* the radix */ | |
259 const int m = *factors++; /* stage's FFT length / p */ | |
260 const peck_fft_cpx *Fout_end = Fout + p*m; | |
261 | |
262 // printf("kf_work\n"); | |
263 | |
264 if (m == 1) { | |
265 do { | |
266 *Fout = *f; | |
267 f += fstride; | |
268 } while (++Fout != Fout_end); | |
269 } else { | |
270 do { | |
271 // recursive call: | |
272 // DFT of size m*p performed by doing | |
273 // p instances of smaller DFTs of size m, | |
274 // each one takes a decimated version of the input | |
275 kf_work(Fout, f, fstride*p, factors, st); | |
276 f += fstride; | |
277 } while ((Fout += m) != Fout_end); | |
278 } | |
279 | |
280 Fout=Fout_beg; | |
281 | |
282 // recombine the p smaller DFTs | |
283 switch (p) { | |
284 case 2: kf_bfly2(Fout, fstride, st, m); break; | |
285 case 3: kf_bfly3(Fout, fstride, st, m); break; | |
286 case 4: kf_bfly4(Fout, fstride, st, m); break; | |
287 case 5: kf_bfly5(Fout, fstride, st, m); break; | |
288 default: kf_bfly_generic(Fout, fstride, st, m, p); break; | |
289 } | |
290 } | |
291 | |
292 /* | |
293 * facbuf is populated by p1, m1, p2, m2, ... | |
294 * where | |
295 * p[i] * m[i] = m[i-1] | |
296 * m0 = n | |
297 */ | |
298 static void kf_factor(int n, int * facbuf) { | |
299 int p = 4; | |
300 float floor_sqrt; | |
301 floor_sqrt = floorf(sqrtf(n)); | |
302 | |
303 /* factor out powers of 4, powers of 2, then any remaining primes */ | |
304 do { | |
305 while (n % p) { | |
306 switch (p) { | |
307 case 4: p = 2; break; | |
308 case 2: p = 3; break; | |
309 default: p += 2; break; | |
310 } | |
311 if (p > floor_sqrt) | |
312 p = n; /* no more factors, skip to end */ | |
313 } | |
314 n /= p; | |
315 *facbuf++ = p; | |
316 *facbuf++ = n; | |
317 } while (n > 1); | |
318 } | |
319 | |
320 /* | |
321 * User-callable function to allocate all necessary storage space for the fft. | |
322 * The return value is a contiguous block of memory, allocated with malloc. As such, | |
323 * it can be freed with free(), rather than a peck_fft-specific function. | |
324 */ | |
325 peck_fft_cfg peck_fft_alloc(int nfft, int inverse_fft, void * mem, size_t * lenmem) { | |
326 peck_fft_cfg st = NULL; | |
327 size_t memneeded = sizeof(struct peck_fft_state) | |
328 + sizeof(peck_fft_cpx)*(nfft-1); /* twiddle factors */ | |
329 | |
330 if (lenmem == NULL) { | |
331 st = ( peck_fft_cfg)PECK_FFT_MALLOC(memneeded); | |
332 } else { | |
333 if (mem != NULL && *lenmem >= memneeded) | |
334 st = (peck_fft_cfg)mem; | |
335 *lenmem = memneeded; | |
336 } | |
337 | |
338 if (st) { | |
339 int i; | |
340 st->nfft=nfft; | |
341 st->inverse = inverse_fft; | |
342 | |
343 for (i = 0; i < nfft; ++i) { | |
344 const float pi = 3.14159265359f; | |
345 float phase = -2*pi*i / nfft; | |
346 if (st->inverse) | |
347 phase *= -1; | |
348 kf_cexp(st->twiddles+i, phase); | |
349 } | |
350 | |
351 kf_factor(nfft, st->factors); | |
352 } | |
353 return st; | |
354 } | |
355 | |
356 void peck_fft(peck_fft_cfg cfg, const peck_fft_cpx *fin, peck_fft_cpx *fout) { | |
357 kf_work(fout, fin, 1, cfg->factors, cfg); | |
358 } | |
359 | |
360 void peck_fft_cleanup(void) { | |
361 /* nothing needed any more */ | |
362 } | |
363 | |
364 int peck_fft_next_fast_size(int n) { | |
365 while (1) { | |
366 int m = n; | |
367 while ((m % 2) == 0) m /= 2; | |
368 while ((m % 3) == 0) m /= 3; | |
369 while ((m % 5) == 0) m /= 5; | |
370 if (m <= 1) | |
371 break; /* n is completely factorable by twos, threes, and fives */ | |
372 n++; | |
373 } | |
374 return n; | |
375 } |