Mercurial > hg > peckfft
diff peck_fft.c @ 10:05f6ab0a17c0
backup
author | Peter Meerwald <p.meerwald@bct-electronic.com> |
---|---|
date | Wed, 21 Sep 2011 15:20:58 +0200 |
parents | 8726585681f6 |
children | abdcde012978 |
line wrap: on
line diff
--- a/peck_fft.c Wed Sep 21 12:18:40 2011 +0200 +++ b/peck_fft.c Wed Sep 21 15:20:58 2011 +0200 @@ -19,7 +19,7 @@ */ #if !BFLY2_ASM static void kf_bfly2( - peck_fft_cpx *Fout, + peck_fft_cpx * __restrict Fout, const size_t fstride, const peck_fft_cfg st, int m) { @@ -40,51 +40,77 @@ } #endif +#if !BFLY4_ASM static void kf_bfly4( - peck_fft_cpx * Fout, + peck_fft_cpx * __restrict Fout, const size_t fstride, const peck_fft_cfg st, const size_t m) { - peck_fft_cpx *tw1,*tw2,*tw3; - peck_fft_cpx scratch[6]; - size_t k=m; - const size_t m2=2*m; - const size_t m3=3*m; + peck_fft_cpx scratch[4]; + peck_fft_cpx * __restrict tw1, * __restrict tw2, * __restrict tw3; + size_t k = m; + const size_t m2 = 2*m; + const size_t m3 = 3*m; // printf("kf_bfly4, %d\n", fstride); tw3 = tw2 = tw1 = st->twiddles; - do { - C_MUL(scratch[0], Fout[m], *tw1); - C_MUL(scratch[1], Fout[m2], *tw2); - C_MUL(scratch[2], Fout[m3], *tw3); + if (st->inverse) { + do { + C_MUL(scratch[0], Fout[m], *tw1); + C_MUL(scratch[3], Fout[m2], *tw2); + C_MUL(scratch[2], Fout[m3], *tw3); - C_SUB(scratch[5], *Fout, scratch[1]); - C_ADDTO(*Fout, scratch[1]); - C_ADD(scratch[3], scratch[0], scratch[2]); - C_SUB(scratch[4], scratch[0], scratch[2]); - C_SUB(Fout[m2], *Fout, scratch[3]); - tw1 += fstride; - tw2 += fstride*2; - tw3 += fstride*3; - C_ADDTO(*Fout, scratch[3]); + C_SUB(scratch[1], *Fout, scratch[3]); + C_ADDTO(*Fout, scratch[3]); + + C_ADD(scratch[3], scratch[0], scratch[2]); + C_SUB(Fout[m2], *Fout, scratch[3]); + C_ADDTO(*Fout, scratch[3]); + + tw1 += fstride; + tw2 += fstride*2; + tw3 += fstride*3; + + C_SUB(scratch[3], scratch[0], scratch[2]); + Fout[m].r = scratch[1].r - scratch[3].i; + Fout[m].i = scratch[1].i + scratch[3].r; + Fout[m3].r = scratch[1].r + scratch[3].i; + Fout[m3].i = scratch[1].i - scratch[3].r; - if (st->inverse) { - Fout[m].r = scratch[5].r - scratch[4].i; - Fout[m].i = scratch[5].i + scratch[4].r; - Fout[m3].r = scratch[5].r + scratch[4].i; - Fout[m3].i = scratch[5].i - scratch[4].r; - } else { - Fout[m].r = scratch[5].r + scratch[4].i; - Fout[m].i = scratch[5].i - scratch[4].r; - Fout[m3].r = scratch[5].r - scratch[4].i; - Fout[m3].i = scratch[5].i + scratch[4].r; - } - ++Fout; - } while (--k); + ++Fout; + } while (--k); + } + else { + do { + C_MUL(scratch[0], Fout[m], *tw1); + C_MUL(scratch[3], Fout[m2], *tw2); + C_MUL(scratch[2], Fout[m3], *tw3); + + C_SUB(scratch[1], *Fout, scratch[3]); + C_ADDTO(*Fout, scratch[3]); + + C_ADD(scratch[3], scratch[0], scratch[2]); + C_SUB(Fout[m2], *Fout, scratch[3]); + C_ADDTO(*Fout, scratch[3]); + + tw1 += fstride; + tw2 += fstride*2; + tw3 += fstride*3; + + C_SUB(scratch[3], scratch[0], scratch[2]); + Fout[m].r = scratch[1].r + scratch[3].i; + Fout[m].i = scratch[1].i - scratch[3].r; + Fout[m3].r = scratch[1].r - scratch[3].i; + Fout[m3].i = scratch[1].i + scratch[3].r; + + ++Fout; + } while (--k); + } } +#endif static void kf_bfly3( peck_fft_cpx * Fout,