comparison peck_fft.c @ 0:723f588b82ac

import
author Peter Meerwald <p.meerwald@bct-electronic.com>
date Fri, 16 Sep 2011 12:53:08 +0200
parents
children cfec79393811
comparison
equal deleted inserted replaced
-1:000000000000 0:723f588b82ac
1 /*
2 Copyright (c) 2003-2010, Mark Borgerding
3
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
9 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
10 * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
11
12 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
13 */
14
15
16 #include "_peck_fft_guts.h"
17 /* The guts header contains all the multiplication and addition macros that are defined for
18 fixed or floating point complex numbers. It also delares the kf_ internal functions.
19 */
20
21 static void kf_bfly2(
22 peck_fft_cpx * Fout,
23 const size_t fstride,
24 const peck_fft_cfg st,
25 int m
26 )
27 {
28
29 //printf("kf_bfly2\n");
30
31 peck_fft_cpx * Fout2;
32 peck_fft_cpx * tw1 = st->twiddles;
33 peck_fft_cpx t;
34 Fout2 = Fout + m;
35 do{
36 C_FIXDIV(*Fout,2); C_FIXDIV(*Fout2,2);
37
38 C_MUL (t, *Fout2 , *tw1);
39 tw1 += fstride;
40 C_SUB( *Fout2 , *Fout , t );
41 C_ADDTO( *Fout , t );
42 ++Fout2;
43 ++Fout;
44 }while (--m);
45 }
46
47 static void kf_bfly4(
48 peck_fft_cpx * Fout,
49 const size_t fstride,
50 const peck_fft_cfg st,
51 const size_t m
52 )
53 {
54 peck_fft_cpx *tw1,*tw2,*tw3;
55 peck_fft_cpx scratch[6];
56 size_t k=m;
57 const size_t m2=2*m;
58 const size_t m3=3*m;
59
60 //printf("kf_bfly4\n");
61
62
63 tw3 = tw2 = tw1 = st->twiddles;
64
65 do {
66 C_FIXDIV(*Fout,4); C_FIXDIV(Fout[m],4); C_FIXDIV(Fout[m2],4); C_FIXDIV(Fout[m3],4);
67
68 C_MUL(scratch[0],Fout[m] , *tw1 );
69 C_MUL(scratch[1],Fout[m2] , *tw2 );
70 C_MUL(scratch[2],Fout[m3] , *tw3 );
71
72 C_SUB( scratch[5] , *Fout, scratch[1] );
73 C_ADDTO(*Fout, scratch[1]);
74 C_ADD( scratch[3] , scratch[0] , scratch[2] );
75 C_SUB( scratch[4] , scratch[0] , scratch[2] );
76 C_SUB( Fout[m2], *Fout, scratch[3] );
77 tw1 += fstride;
78 tw2 += fstride*2;
79 tw3 += fstride*3;
80 C_ADDTO( *Fout , scratch[3] );
81
82 if(st->inverse) {
83 Fout[m].r = scratch[5].r - scratch[4].i;
84 Fout[m].i = scratch[5].i + scratch[4].r;
85 Fout[m3].r = scratch[5].r + scratch[4].i;
86 Fout[m3].i = scratch[5].i - scratch[4].r;
87 }else{
88 Fout[m].r = scratch[5].r + scratch[4].i;
89 Fout[m].i = scratch[5].i - scratch[4].r;
90 Fout[m3].r = scratch[5].r - scratch[4].i;
91 Fout[m3].i = scratch[5].i + scratch[4].r;
92 }
93 ++Fout;
94 }while(--k);
95 }
96
97 static void kf_bfly3(
98 peck_fft_cpx * Fout,
99 const size_t fstride,
100 const peck_fft_cfg st,
101 size_t m
102 )
103 {
104 size_t k=m;
105 const size_t m2 = 2*m;
106 peck_fft_cpx *tw1,*tw2;
107 peck_fft_cpx scratch[5];
108 peck_fft_cpx epi3;
109 epi3 = st->twiddles[fstride*m];
110
111 printf("kf_bfly3\n");
112
113
114 tw1=tw2=st->twiddles;
115
116 do{
117 C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
118
119 C_MUL(scratch[1],Fout[m] , *tw1);
120 C_MUL(scratch[2],Fout[m2] , *tw2);
121
122 C_ADD(scratch[3],scratch[1],scratch[2]);
123 C_SUB(scratch[0],scratch[1],scratch[2]);
124 tw1 += fstride;
125 tw2 += fstride*2;
126
127 Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
128 Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
129
130 C_MULBYSCALAR( scratch[0] , epi3.i );
131
132 C_ADDTO(*Fout,scratch[3]);
133
134 Fout[m2].r = Fout[m].r + scratch[0].i;
135 Fout[m2].i = Fout[m].i - scratch[0].r;
136
137 Fout[m].r -= scratch[0].i;
138 Fout[m].i += scratch[0].r;
139
140 ++Fout;
141 }while(--k);
142 }
143
144 static void kf_bfly5(
145 peck_fft_cpx * Fout,
146 const size_t fstride,
147 const peck_fft_cfg st,
148 int m
149 )
150 {
151 peck_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
152 int u;
153 peck_fft_cpx scratch[13];
154 peck_fft_cpx * twiddles = st->twiddles;
155 peck_fft_cpx *tw;
156 peck_fft_cpx ya,yb;
157 ya = twiddles[fstride*m];
158 yb = twiddles[fstride*2*m];
159
160 printf("kf_bfly5\n");
161
162
163 Fout0=Fout;
164 Fout1=Fout0+m;
165 Fout2=Fout0+2*m;
166 Fout3=Fout0+3*m;
167 Fout4=Fout0+4*m;
168
169 tw=st->twiddles;
170 for ( u=0; u<m; ++u ) {
171 C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
172 scratch[0] = *Fout0;
173
174 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
175 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
176 C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
177 C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
178
179 C_ADD( scratch[7],scratch[1],scratch[4]);
180 C_SUB( scratch[10],scratch[1],scratch[4]);
181 C_ADD( scratch[8],scratch[2],scratch[3]);
182 C_SUB( scratch[9],scratch[2],scratch[3]);
183
184 Fout0->r += scratch[7].r + scratch[8].r;
185 Fout0->i += scratch[7].i + scratch[8].i;
186
187 scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
188 scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
189
190 scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
191 scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
192
193 C_SUB(*Fout1,scratch[5],scratch[6]);
194 C_ADD(*Fout4,scratch[5],scratch[6]);
195
196 scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
197 scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
198 scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
199 scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
200
201 C_ADD(*Fout2,scratch[11],scratch[12]);
202 C_SUB(*Fout3,scratch[11],scratch[12]);
203
204 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
205 }
206 }
207
208 /* perform the butterfly for one stage of a mixed radix FFT */
209 static void kf_bfly_generic(
210 peck_fft_cpx * Fout,
211 const size_t fstride,
212 const peck_fft_cfg st,
213 int m,
214 int p
215 )
216 {
217 int u,k,q1,q;
218 peck_fft_cpx * twiddles = st->twiddles;
219 peck_fft_cpx t;
220 int Norig = st->nfft;
221
222 printf("kf_bfly_generic\n");
223
224
225 peck_fft_cpx * scratch = (peck_fft_cpx*)PECK_FFT_TMP_ALLOC(sizeof(peck_fft_cpx)*p);
226
227 for ( u=0; u<m; ++u ) {
228 k=u;
229 for (q1 = 0; q1 < p; ++q1) {
230 scratch[q1] = Fout[k];
231 C_FIXDIV(scratch[q1], p);
232 k += m;
233 }
234
235 k=u;
236 for ( q1=0 ; q1<p ; ++q1 ) {
237 int twidx=0;
238 Fout[ k ] = scratch[0];
239 for (q=1;q<p;++q ) {
240 twidx += fstride * k;
241 if (twidx>=Norig) twidx-=Norig;
242 C_MUL(t,scratch[q] , twiddles[twidx] );
243 C_ADDTO( Fout[ k ] ,t);
244 }
245 k += m;
246 }
247 }
248 PECK_FFT_TMP_FREE(scratch);
249 }
250
251 static void kf_work(
252 peck_fft_cpx * Fout,
253 const peck_fft_cpx * f,
254 const size_t fstride,
255 int *factors,
256 const peck_fft_cfg st) {
257 peck_fft_cpx *Fout_beg = Fout;
258 const int p = *factors++; /* the radix */
259 const int m = *factors++; /* stage's FFT length / p */
260 const peck_fft_cpx *Fout_end = Fout + p*m;
261
262 // printf("kf_work\n");
263
264 if (m == 1) {
265 do {
266 *Fout = *f;
267 f += fstride;
268 } while (++Fout != Fout_end);
269 } else {
270 do {
271 // recursive call:
272 // DFT of size m*p performed by doing
273 // p instances of smaller DFTs of size m,
274 // each one takes a decimated version of the input
275 kf_work(Fout, f, fstride*p, factors, st);
276 f += fstride;
277 } while ((Fout += m) != Fout_end);
278 }
279
280 Fout=Fout_beg;
281
282 // recombine the p smaller DFTs
283 switch (p) {
284 case 2: kf_bfly2(Fout, fstride, st, m); break;
285 case 3: kf_bfly3(Fout, fstride, st, m); break;
286 case 4: kf_bfly4(Fout, fstride, st, m); break;
287 case 5: kf_bfly5(Fout, fstride, st, m); break;
288 default: kf_bfly_generic(Fout, fstride, st, m, p); break;
289 }
290 }
291
292 /*
293 * facbuf is populated by p1, m1, p2, m2, ...
294 * where
295 * p[i] * m[i] = m[i-1]
296 * m0 = n
297 */
298 static void kf_factor(int n, int * facbuf) {
299 int p = 4;
300 float floor_sqrt;
301 floor_sqrt = floorf(sqrtf(n));
302
303 /* factor out powers of 4, powers of 2, then any remaining primes */
304 do {
305 while (n % p) {
306 switch (p) {
307 case 4: p = 2; break;
308 case 2: p = 3; break;
309 default: p += 2; break;
310 }
311 if (p > floor_sqrt)
312 p = n; /* no more factors, skip to end */
313 }
314 n /= p;
315 *facbuf++ = p;
316 *facbuf++ = n;
317 } while (n > 1);
318 }
319
320 /*
321 * User-callable function to allocate all necessary storage space for the fft.
322 * The return value is a contiguous block of memory, allocated with malloc. As such,
323 * it can be freed with free(), rather than a peck_fft-specific function.
324 */
325 peck_fft_cfg peck_fft_alloc(int nfft, int inverse_fft, void * mem, size_t * lenmem) {
326 peck_fft_cfg st = NULL;
327 size_t memneeded = sizeof(struct peck_fft_state)
328 + sizeof(peck_fft_cpx)*(nfft-1); /* twiddle factors */
329
330 if (lenmem == NULL) {
331 st = ( peck_fft_cfg)PECK_FFT_MALLOC(memneeded);
332 } else {
333 if (mem != NULL && *lenmem >= memneeded)
334 st = (peck_fft_cfg)mem;
335 *lenmem = memneeded;
336 }
337
338 if (st) {
339 int i;
340 st->nfft=nfft;
341 st->inverse = inverse_fft;
342
343 for (i = 0; i < nfft; ++i) {
344 const float pi = 3.14159265359f;
345 float phase = -2*pi*i / nfft;
346 if (st->inverse)
347 phase *= -1;
348 kf_cexp(st->twiddles+i, phase);
349 }
350
351 kf_factor(nfft, st->factors);
352 }
353 return st;
354 }
355
356 void peck_fft(peck_fft_cfg cfg, const peck_fft_cpx *fin, peck_fft_cpx *fout) {
357 kf_work(fout, fin, 1, cfg->factors, cfg);
358 }
359
360 void peck_fft_cleanup(void) {
361 /* nothing needed any more */
362 }
363
364 int peck_fft_next_fast_size(int n) {
365 while (1) {
366 int m = n;
367 while ((m % 2) == 0) m /= 2;
368 while ((m % 3) == 0) m /= 3;
369 while ((m % 5) == 0) m /= 5;
370 if (m <= 1)
371 break; /* n is completely factorable by twos, threes, and fives */
372 n++;
373 }
374 return n;
375 }

Repositories maintained by Peter Meerwald, pmeerw@pmeerw.net.