Mercurial > hg > peckfft
comparison peck_fft.c @ 4:2d6c49fcafcb
neon2 and neon4 support
| author | Peter Meerwald <p.meerwald@bct-electronic.com> |
|---|---|
| date | Fri, 16 Sep 2011 14:04:19 +0200 |
| parents | cfec79393811 |
| children | c7237a7544eb |
comparison
equal
deleted
inserted
replaced
| 3:3b31bd44a09f | 4:2d6c49fcafcb |
|---|---|
| 17 /* The guts header contains all the multiplication and addition macros that are defined for | 17 /* The guts header contains all the multiplication and addition macros that are defined for |
| 18 * fixed or floating point complex numbers. It also delares the kf_ internal functions. | 18 * fixed or floating point complex numbers. It also delares the kf_ internal functions. |
| 19 */ | 19 */ |
| 20 | 20 |
| 21 static void kf_bfly2( | 21 static void kf_bfly2( |
| 22 peck_fft_cpx * Fout, | 22 peck_fft_cpx * Fout, |
| 23 const size_t fstride, | 23 const size_t fstride, |
| 24 const peck_fft_cfg st, | 24 const peck_fft_cfg st, |
| 25 int m) { | 25 int m) { |
| 26 | 26 |
| 27 //printf("kf_bfly2\n"); | 27 //printf("kf_bfly2\n"); |
| 28 | 28 |
| 29 peck_fft_cpx * Fout2; | 29 peck_fft_cpx * Fout2; |
| 30 peck_fft_cpx * tw1 = st->twiddles; | 30 peck_fft_cpx * tw1 = st->twiddles; |
| 42 ++Fout; | 42 ++Fout; |
| 43 } while (--m); | 43 } while (--m); |
| 44 } | 44 } |
| 45 | 45 |
| 46 static void kf_bfly4( | 46 static void kf_bfly4( |
| 47 peck_fft_cpx * Fout, | 47 peck_fft_cpx * Fout, |
| 48 const size_t fstride, | 48 const size_t fstride, |
| 49 const peck_fft_cfg st, | 49 const peck_fft_cfg st, |
| 50 const size_t m) { | 50 const size_t m) { |
| 51 | |
| 51 peck_fft_cpx *tw1,*tw2,*tw3; | 52 peck_fft_cpx *tw1,*tw2,*tw3; |
| 52 peck_fft_cpx scratch[6]; | 53 peck_fft_cpx scratch[6]; |
| 53 size_t k=m; | 54 size_t k=m; |
| 54 const size_t m2=2*m; | 55 const size_t m2=2*m; |
| 55 const size_t m3=3*m; | 56 const size_t m3=3*m; |
| 92 ++Fout; | 93 ++Fout; |
| 93 } while (--k); | 94 } while (--k); |
| 94 } | 95 } |
| 95 | 96 |
| 96 static void kf_bfly3( | 97 static void kf_bfly3( |
| 97 peck_fft_cpx * Fout, | 98 peck_fft_cpx * Fout, |
| 98 const size_t fstride, | 99 const size_t fstride, |
| 99 const peck_fft_cfg st, | 100 const peck_fft_cfg st, |
| 100 size_t m) { | 101 size_t m) { |
| 102 | |
| 101 size_t k=m; | 103 size_t k=m; |
| 102 const size_t m2 = 2*m; | 104 const size_t m2 = 2*m; |
| 103 peck_fft_cpx *tw1, *tw2; | 105 peck_fft_cpx *tw1, *tw2; |
| 104 peck_fft_cpx scratch[5]; | 106 peck_fft_cpx scratch[5]; |
| 105 peck_fft_cpx epi3; | 107 peck_fft_cpx epi3; |
| 106 epi3 = st->twiddles[fstride*m]; | 108 epi3 = st->twiddles[fstride*m]; |
| 107 | 109 |
| 108 printf("kf_bfly3\n"); | 110 printf("kf_bfly3\n"); |
| 109 | 111 |
| 110 | |
| 111 tw1=tw2=st->twiddles; | 112 tw1=tw2=st->twiddles; |
| 112 | 113 |
| 113 do { | 114 do { |
| 114 C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3); | 115 C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3); |
| 115 | 116 |
| 137 ++Fout; | 138 ++Fout; |
| 138 } while (--k); | 139 } while (--k); |
| 139 } | 140 } |
| 140 | 141 |
| 141 static void kf_bfly5( | 142 static void kf_bfly5( |
| 142 peck_fft_cpx * Fout, | 143 peck_fft_cpx * Fout, |
| 143 const size_t fstride, | 144 const size_t fstride, |
| 144 const peck_fft_cfg st, | 145 const peck_fft_cfg st, |
| 145 int m | 146 int m |
| 146 ) | 147 ) { |
| 147 { | |
| 148 peck_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; | 148 peck_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; |
| 149 int u; | 149 int u; |
| 150 peck_fft_cpx scratch[13]; | 150 peck_fft_cpx scratch[13]; |
| 151 peck_fft_cpx * twiddles = st->twiddles; | 151 peck_fft_cpx * twiddles = st->twiddles; |
| 152 peck_fft_cpx *tw; | 152 peck_fft_cpx *tw; |
| 154 ya = twiddles[fstride*m]; | 154 ya = twiddles[fstride*m]; |
| 155 yb = twiddles[fstride*2*m]; | 155 yb = twiddles[fstride*2*m]; |
| 156 | 156 |
| 157 printf("kf_bfly5\n"); | 157 printf("kf_bfly5\n"); |
| 158 | 158 |
| 159 | |
| 160 Fout0=Fout; | 159 Fout0=Fout; |
| 161 Fout1=Fout0+m; | 160 Fout1=Fout0+m; |
| 162 Fout2=Fout0+2*m; | 161 Fout2=Fout0+2*m; |
| 163 Fout3=Fout0+3*m; | 162 Fout3=Fout0+3*m; |
| 164 Fout4=Fout0+4*m; | 163 Fout4=Fout0+4*m; |
| 165 | 164 |
| 166 tw=st->twiddles; | 165 tw=st->twiddles; |
| 167 for ( u=0; u<m; ++u ) { | 166 for (u = 0; u < m; ++u) { |
| 168 C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5); | 167 C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5); |
| 169 scratch[0] = *Fout0; | 168 scratch[0] = *Fout0; |
| 170 | 169 |
| 171 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]); | 170 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]); |
| 172 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]); | 171 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]); |
| 202 } | 201 } |
| 203 } | 202 } |
| 204 | 203 |
| 205 /* perform the butterfly for one stage of a mixed radix FFT */ | 204 /* perform the butterfly for one stage of a mixed radix FFT */ |
| 206 static void kf_bfly_generic( | 205 static void kf_bfly_generic( |
| 207 peck_fft_cpx * Fout, | 206 peck_fft_cpx * Fout, |
| 208 const size_t fstride, | 207 const size_t fstride, |
| 209 const peck_fft_cfg st, | 208 const peck_fft_cfg st, |
| 210 int m, | 209 int m, |
| 211 int p | 210 int p) { |
| 212 ) | 211 |
| 213 { | |
| 214 int u,k,q1,q; | 212 int u,k,q1,q; |
| 215 peck_fft_cpx * twiddles = st->twiddles; | 213 peck_fft_cpx * twiddles = st->twiddles; |
| 216 peck_fft_cpx t; | 214 peck_fft_cpx t; |
| 217 int Norig = st->nfft; | 215 int Norig = st->nfft; |
| 218 | 216 |
| 219 printf("kf_bfly_generic\n"); | 217 printf("kf_bfly_generic\n"); |
| 220 | |
| 221 | 218 |
| 222 peck_fft_cpx * scratch = (peck_fft_cpx*)PECK_FFT_TMP_ALLOC(sizeof(peck_fft_cpx)*p); | 219 peck_fft_cpx * scratch = (peck_fft_cpx*)PECK_FFT_TMP_ALLOC(sizeof(peck_fft_cpx)*p); |
| 223 | 220 |
| 224 for ( u=0; u<m; ++u ) { | 221 for ( u=0; u<m; ++u ) { |
| 225 k=u; | 222 k=u; |
