Mercurial > hg > peckfft
comparison peck_fft.c @ 10:05f6ab0a17c0
backup
| author | Peter Meerwald <p.meerwald@bct-electronic.com> |
|---|---|
| date | Wed, 21 Sep 2011 15:20:58 +0200 |
| parents | 8726585681f6 |
| children | abdcde012978 |
comparison
equal
deleted
inserted
replaced
| 9:8726585681f6 | 10:05f6ab0a17c0 |
|---|---|
| 17 /* The guts header contains all the multiplication and addition macros that are defined for | 17 /* The guts header contains all the multiplication and addition macros that are defined for |
| 18 * fixed or floating point complex numbers. It also delares the kf_ internal functions. | 18 * fixed or floating point complex numbers. It also delares the kf_ internal functions. |
| 19 */ | 19 */ |
| 20 #if !BFLY2_ASM | 20 #if !BFLY2_ASM |
| 21 static void kf_bfly2( | 21 static void kf_bfly2( |
| 22 peck_fft_cpx *Fout, | 22 peck_fft_cpx * __restrict Fout, |
| 23 const size_t fstride, | 23 const size_t fstride, |
| 24 const peck_fft_cfg st, | 24 const peck_fft_cfg st, |
| 25 int m) { | 25 int m) { |
| 26 | 26 |
| 27 // printf("kf_bfly2, %d\n", fstride); | 27 // printf("kf_bfly2, %d\n", fstride); |
| 38 ++Fout; | 38 ++Fout; |
| 39 } while (--m); | 39 } while (--m); |
| 40 } | 40 } |
| 41 #endif | 41 #endif |
| 42 | 42 |
| 43 #if !BFLY4_ASM | |
| 43 static void kf_bfly4( | 44 static void kf_bfly4( |
| 44 peck_fft_cpx * Fout, | 45 peck_fft_cpx * __restrict Fout, |
| 45 const size_t fstride, | 46 const size_t fstride, |
| 46 const peck_fft_cfg st, | 47 const peck_fft_cfg st, |
| 47 const size_t m) { | 48 const size_t m) { |
| 48 | 49 |
| 49 peck_fft_cpx *tw1,*tw2,*tw3; | 50 peck_fft_cpx scratch[4]; |
| 50 peck_fft_cpx scratch[6]; | 51 peck_fft_cpx * __restrict tw1, * __restrict tw2, * __restrict tw3; |
| 51 size_t k=m; | 52 size_t k = m; |
| 52 const size_t m2=2*m; | 53 const size_t m2 = 2*m; |
| 53 const size_t m3=3*m; | 54 const size_t m3 = 3*m; |
| 54 | 55 |
| 55 // printf("kf_bfly4, %d\n", fstride); | 56 // printf("kf_bfly4, %d\n", fstride); |
| 56 | 57 |
| 57 tw3 = tw2 = tw1 = st->twiddles; | 58 tw3 = tw2 = tw1 = st->twiddles; |
| 58 | 59 |
| 59 do { | 60 if (st->inverse) { |
| 60 C_MUL(scratch[0], Fout[m], *tw1); | 61 do { |
| 61 C_MUL(scratch[1], Fout[m2], *tw2); | 62 C_MUL(scratch[0], Fout[m], *tw1); |
| 62 C_MUL(scratch[2], Fout[m3], *tw3); | 63 C_MUL(scratch[3], Fout[m2], *tw2); |
| 63 | 64 C_MUL(scratch[2], Fout[m3], *tw3); |
| 64 C_SUB(scratch[5], *Fout, scratch[1]); | 65 |
| 65 C_ADDTO(*Fout, scratch[1]); | 66 C_SUB(scratch[1], *Fout, scratch[3]); |
| 66 C_ADD(scratch[3], scratch[0], scratch[2]); | 67 C_ADDTO(*Fout, scratch[3]); |
| 67 C_SUB(scratch[4], scratch[0], scratch[2]); | 68 |
| 68 C_SUB(Fout[m2], *Fout, scratch[3]); | 69 C_ADD(scratch[3], scratch[0], scratch[2]); |
| 69 tw1 += fstride; | 70 C_SUB(Fout[m2], *Fout, scratch[3]); |
| 70 tw2 += fstride*2; | 71 C_ADDTO(*Fout, scratch[3]); |
| 71 tw3 += fstride*3; | 72 |
| 72 C_ADDTO(*Fout, scratch[3]); | 73 tw1 += fstride; |
| 73 | 74 tw2 += fstride*2; |
| 74 if (st->inverse) { | 75 tw3 += fstride*3; |
| 75 Fout[m].r = scratch[5].r - scratch[4].i; | 76 |
| 76 Fout[m].i = scratch[5].i + scratch[4].r; | 77 C_SUB(scratch[3], scratch[0], scratch[2]); |
| 77 Fout[m3].r = scratch[5].r + scratch[4].i; | 78 Fout[m].r = scratch[1].r - scratch[3].i; |
| 78 Fout[m3].i = scratch[5].i - scratch[4].r; | 79 Fout[m].i = scratch[1].i + scratch[3].r; |
| 79 } else { | 80 Fout[m3].r = scratch[1].r + scratch[3].i; |
| 80 Fout[m].r = scratch[5].r + scratch[4].i; | 81 Fout[m3].i = scratch[1].i - scratch[3].r; |
| 81 Fout[m].i = scratch[5].i - scratch[4].r; | 82 |
| 82 Fout[m3].r = scratch[5].r - scratch[4].i; | 83 ++Fout; |
| 83 Fout[m3].i = scratch[5].i + scratch[4].r; | 84 } while (--k); |
| 84 } | 85 } |
| 85 ++Fout; | 86 else { |
| 86 } while (--k); | 87 do { |
| 87 } | 88 C_MUL(scratch[0], Fout[m], *tw1); |
| 89 C_MUL(scratch[3], Fout[m2], *tw2); | |
| 90 C_MUL(scratch[2], Fout[m3], *tw3); | |
| 91 | |
| 92 C_SUB(scratch[1], *Fout, scratch[3]); | |
| 93 C_ADDTO(*Fout, scratch[3]); | |
| 94 | |
| 95 C_ADD(scratch[3], scratch[0], scratch[2]); | |
| 96 C_SUB(Fout[m2], *Fout, scratch[3]); | |
| 97 C_ADDTO(*Fout, scratch[3]); | |
| 98 | |
| 99 tw1 += fstride; | |
| 100 tw2 += fstride*2; | |
| 101 tw3 += fstride*3; | |
| 102 | |
| 103 C_SUB(scratch[3], scratch[0], scratch[2]); | |
| 104 Fout[m].r = scratch[1].r + scratch[3].i; | |
| 105 Fout[m].i = scratch[1].i - scratch[3].r; | |
| 106 Fout[m3].r = scratch[1].r - scratch[3].i; | |
| 107 Fout[m3].i = scratch[1].i + scratch[3].r; | |
| 108 | |
| 109 ++Fout; | |
| 110 } while (--k); | |
| 111 } | |
| 112 } | |
| 113 #endif | |
| 88 | 114 |
| 89 static void kf_bfly3( | 115 static void kf_bfly3( |
| 90 peck_fft_cpx * Fout, | 116 peck_fft_cpx * Fout, |
| 91 const size_t fstride, | 117 const size_t fstride, |
| 92 const peck_fft_cfg st, | 118 const peck_fft_cfg st, |
