comparison peck_fft.c @ 4:2d6c49fcafcb

neon2 and neon4 support
author Peter Meerwald <p.meerwald@bct-electronic.com>
date Fri, 16 Sep 2011 14:04:19 +0200
parents cfec79393811
children c7237a7544eb
comparison
equal deleted inserted replaced
3:3b31bd44a09f 4:2d6c49fcafcb
17 /* The guts header contains all the multiplication and addition macros that are defined for 17 /* The guts header contains all the multiplication and addition macros that are defined for
18 * fixed or floating point complex numbers. It also delares the kf_ internal functions. 18 * fixed or floating point complex numbers. It also delares the kf_ internal functions.
19 */ 19 */
20 20
21 static void kf_bfly2( 21 static void kf_bfly2(
22 peck_fft_cpx * Fout, 22 peck_fft_cpx * Fout,
23 const size_t fstride, 23 const size_t fstride,
24 const peck_fft_cfg st, 24 const peck_fft_cfg st,
25 int m) { 25 int m) {
26 26
27 //printf("kf_bfly2\n"); 27 //printf("kf_bfly2\n");
28 28
29 peck_fft_cpx * Fout2; 29 peck_fft_cpx * Fout2;
30 peck_fft_cpx * tw1 = st->twiddles; 30 peck_fft_cpx * tw1 = st->twiddles;
42 ++Fout; 42 ++Fout;
43 } while (--m); 43 } while (--m);
44 } 44 }
45 45
46 static void kf_bfly4( 46 static void kf_bfly4(
47 peck_fft_cpx * Fout, 47 peck_fft_cpx * Fout,
48 const size_t fstride, 48 const size_t fstride,
49 const peck_fft_cfg st, 49 const peck_fft_cfg st,
50 const size_t m) { 50 const size_t m) {
51
51 peck_fft_cpx *tw1,*tw2,*tw3; 52 peck_fft_cpx *tw1,*tw2,*tw3;
52 peck_fft_cpx scratch[6]; 53 peck_fft_cpx scratch[6];
53 size_t k=m; 54 size_t k=m;
54 const size_t m2=2*m; 55 const size_t m2=2*m;
55 const size_t m3=3*m; 56 const size_t m3=3*m;
92 ++Fout; 93 ++Fout;
93 } while (--k); 94 } while (--k);
94 } 95 }
95 96
96 static void kf_bfly3( 97 static void kf_bfly3(
97 peck_fft_cpx * Fout, 98 peck_fft_cpx * Fout,
98 const size_t fstride, 99 const size_t fstride,
99 const peck_fft_cfg st, 100 const peck_fft_cfg st,
100 size_t m) { 101 size_t m) {
102
101 size_t k=m; 103 size_t k=m;
102 const size_t m2 = 2*m; 104 const size_t m2 = 2*m;
103 peck_fft_cpx *tw1, *tw2; 105 peck_fft_cpx *tw1, *tw2;
104 peck_fft_cpx scratch[5]; 106 peck_fft_cpx scratch[5];
105 peck_fft_cpx epi3; 107 peck_fft_cpx epi3;
106 epi3 = st->twiddles[fstride*m]; 108 epi3 = st->twiddles[fstride*m];
107 109
108 printf("kf_bfly3\n"); 110 printf("kf_bfly3\n");
109 111
110
111 tw1=tw2=st->twiddles; 112 tw1=tw2=st->twiddles;
112 113
113 do { 114 do {
114 C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3); 115 C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
115 116
137 ++Fout; 138 ++Fout;
138 } while (--k); 139 } while (--k);
139 } 140 }
140 141
141 static void kf_bfly5( 142 static void kf_bfly5(
142 peck_fft_cpx * Fout, 143 peck_fft_cpx * Fout,
143 const size_t fstride, 144 const size_t fstride,
144 const peck_fft_cfg st, 145 const peck_fft_cfg st,
145 int m 146 int m
146 ) 147 ) {
147 {
148 peck_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; 148 peck_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
149 int u; 149 int u;
150 peck_fft_cpx scratch[13]; 150 peck_fft_cpx scratch[13];
151 peck_fft_cpx * twiddles = st->twiddles; 151 peck_fft_cpx * twiddles = st->twiddles;
152 peck_fft_cpx *tw; 152 peck_fft_cpx *tw;
154 ya = twiddles[fstride*m]; 154 ya = twiddles[fstride*m];
155 yb = twiddles[fstride*2*m]; 155 yb = twiddles[fstride*2*m];
156 156
157 printf("kf_bfly5\n"); 157 printf("kf_bfly5\n");
158 158
159
160 Fout0=Fout; 159 Fout0=Fout;
161 Fout1=Fout0+m; 160 Fout1=Fout0+m;
162 Fout2=Fout0+2*m; 161 Fout2=Fout0+2*m;
163 Fout3=Fout0+3*m; 162 Fout3=Fout0+3*m;
164 Fout4=Fout0+4*m; 163 Fout4=Fout0+4*m;
165 164
166 tw=st->twiddles; 165 tw=st->twiddles;
167 for ( u=0; u<m; ++u ) { 166 for (u = 0; u < m; ++u) {
168 C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5); 167 C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
169 scratch[0] = *Fout0; 168 scratch[0] = *Fout0;
170 169
171 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]); 170 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
172 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]); 171 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
202 } 201 }
203 } 202 }
204 203
205 /* perform the butterfly for one stage of a mixed radix FFT */ 204 /* perform the butterfly for one stage of a mixed radix FFT */
206 static void kf_bfly_generic( 205 static void kf_bfly_generic(
207 peck_fft_cpx * Fout, 206 peck_fft_cpx * Fout,
208 const size_t fstride, 207 const size_t fstride,
209 const peck_fft_cfg st, 208 const peck_fft_cfg st,
210 int m, 209 int m,
211 int p 210 int p) {
212 ) 211
213 {
214 int u,k,q1,q; 212 int u,k,q1,q;
215 peck_fft_cpx * twiddles = st->twiddles; 213 peck_fft_cpx * twiddles = st->twiddles;
216 peck_fft_cpx t; 214 peck_fft_cpx t;
217 int Norig = st->nfft; 215 int Norig = st->nfft;
218 216
219 printf("kf_bfly_generic\n"); 217 printf("kf_bfly_generic\n");
220
221 218
222 peck_fft_cpx * scratch = (peck_fft_cpx*)PECK_FFT_TMP_ALLOC(sizeof(peck_fft_cpx)*p); 219 peck_fft_cpx * scratch = (peck_fft_cpx*)PECK_FFT_TMP_ALLOC(sizeof(peck_fft_cpx)*p);
223 220
224 for ( u=0; u<m; ++u ) { 221 for ( u=0; u<m; ++u ) {
225 k=u; 222 k=u;

Repositories maintained by Peter Meerwald, pmeerw@pmeerw.net.