Mercurial > hg > peckfft
comparison peck_fftr.c @ 4:2d6c49fcafcb
neon2 and neon4 support
author | Peter Meerwald <p.meerwald@bct-electronic.com> |
---|---|
date | Fri, 16 Sep 2011 14:04:19 +0200 |
parents | 3b31bd44a09f |
children | fee54f1878f7 |
comparison
equal
deleted
inserted
replaced
3:3b31bd44a09f | 4:2d6c49fcafcb |
---|---|
17 | 17 |
18 struct peck_fftr_state{ | 18 struct peck_fftr_state{ |
19 peck_fft_cfg substate; | 19 peck_fft_cfg substate; |
20 peck_fft_cpx *tmpbuf; | 20 peck_fft_cpx *tmpbuf; |
21 peck_fft_cpx *super_twiddles; | 21 peck_fft_cpx *super_twiddles; |
22 #ifdef USE_SIMD | 22 #if USE_SIMD == SIMD_SSE2 |
23 void * pad; | 23 void *pad; |
24 #endif | 24 #endif |
25 }; | 25 }; |
26 | 26 |
27 peck_fftr_cfg peck_fftr_alloc(int nfft, int inverse_fft, void *mem, size_t *lenmem) { | 27 peck_fftr_cfg peck_fftr_alloc(int nfft, int inverse_fft, void *mem, size_t *lenmem) { |
28 int i; | 28 int i; |
32 if (nfft & 1) { | 32 if (nfft & 1) { |
33 fprintf(stderr, "Real FFT must be even.\n"); | 33 fprintf(stderr, "Real FFT must be even.\n"); |
34 return NULL; | 34 return NULL; |
35 } | 35 } |
36 nfft >>= 1; | 36 nfft >>= 1; |
37 peck_fft_alloc(nfft, inverse_fft, NULL, &subsize); | |
37 | 38 |
38 peck_fft_alloc(nfft, inverse_fft, NULL, &subsize); | |
39 memneeded = sizeof(struct peck_fftr_state) + subsize + sizeof(peck_fft_cpx) * (nfft * 3 / 2); | 39 memneeded = sizeof(struct peck_fftr_state) + subsize + sizeof(peck_fft_cpx) * (nfft * 3 / 2); |
40 | |
41 if (lenmem == NULL) { | 40 if (lenmem == NULL) { |
42 st = (peck_fftr_cfg) PECK_FFT_MALLOC(memneeded); | 41 st = (peck_fftr_cfg) PECK_FFT_MALLOC(memneeded); |
43 } else { | 42 } else { |
44 if (*lenmem >= memneeded) | 43 if (*lenmem >= memneeded) |
45 st = (peck_fftr_cfg) mem; | 44 st = (peck_fftr_cfg) mem; |
49 return NULL; | 48 return NULL; |
50 | 49 |
51 st->substate = (peck_fft_cfg) (st + 1); /* just beyond peck_fftr_state struct */ | 50 st->substate = (peck_fft_cfg) (st + 1); /* just beyond peck_fftr_state struct */ |
52 st->tmpbuf = (peck_fft_cpx *) (((char *) st->substate) + subsize); | 51 st->tmpbuf = (peck_fft_cpx *) (((char *) st->substate) + subsize); |
53 st->super_twiddles = st->tmpbuf + nfft; | 52 st->super_twiddles = st->tmpbuf + nfft; |
53 | |
54 peck_fft_alloc(nfft, inverse_fft, st->substate, &subsize); | 54 peck_fft_alloc(nfft, inverse_fft, st->substate, &subsize); |
55 | 55 |
56 for (i = 0; i < nfft/2; ++i) { | 56 for (i = 0; i < nfft/2; ++i) { |
57 float phase = | 57 float phase = |
58 -3.14159265359f * ((float) (i+1) / nfft + 0.5f); | 58 -3.14159265359f * ((float) (i+1) / nfft + 0.5f); |
59 if (inverse_fft) | 59 if (inverse_fft) |
60 phase *= -1; | 60 phase *= -1; |
61 kf_cexp(st->super_twiddles+i, phase); | 61 kf_cexp(st->super_twiddles+i, phase); |
62 } | 62 } |
63 | |
63 return st; | 64 return st; |
64 } | 65 } |
65 | 66 |
66 void peck_fftr(peck_fftr_cfg st, const peck_fft_scalar *timedata, peck_fft_cpx *freqdata) { | 67 void peck_fftr(peck_fftr_cfg st, const peck_fft_scalar *timedata, peck_fft_cpx *freqdata) { |
67 /* Input buffer timedata is stored row-wise */ | 68 /* Input buffer timedata is stored row-wise */ |
92 C_FIXDIV(tdc,2); | 93 C_FIXDIV(tdc,2); |
93 CHECK_OVERFLOW_OP(tdc.r ,+, tdc.i); | 94 CHECK_OVERFLOW_OP(tdc.r ,+, tdc.i); |
94 CHECK_OVERFLOW_OP(tdc.r ,-, tdc.i); | 95 CHECK_OVERFLOW_OP(tdc.r ,-, tdc.i); |
95 freqdata[0].r = tdc.r + tdc.i; | 96 freqdata[0].r = tdc.r + tdc.i; |
96 freqdata[ncfft].r = tdc.r - tdc.i; | 97 freqdata[ncfft].r = tdc.r - tdc.i; |
97 #ifdef USE_SIMD | 98 #if USE_SIMD == SIMD_SSE2 |
98 freqdata[ncfft].i = freqdata[0].i = _mm_set1_ps(0); | 99 freqdata[ncfft].i = freqdata[0].i = _mm_set1_ps(0); |
100 #elif USE_SIMD == SIMD_NEON4 | |
101 freqdata[ncfft].i = freqdata[0].i = vdupq_n_f32(0.0f); | |
102 #elif USE_SIMD == SIMD_NEON2 | |
103 freqdata[ncfft].i = freqdata[0].i = vdup_n_f32(0.0f); | |
99 #else | 104 #else |
100 freqdata[ncfft].i = freqdata[0].i = 0; | 105 freqdata[ncfft].i = freqdata[0].i = 0; |
101 #endif | 106 #endif |
102 | 107 |
103 for (k = 1; k <= ncfft/2; ++k) { | 108 for (k = 1; k <= ncfft/2; ++k) { |
136 for (k = 1; k <= ncfft / 2; ++k) { | 141 for (k = 1; k <= ncfft / 2; ++k) { |
137 peck_fft_cpx fk, fnkc, fek, fok, tmp; | 142 peck_fft_cpx fk, fnkc, fek, fok, tmp; |
138 fk = freqdata[k]; | 143 fk = freqdata[k]; |
139 fnkc.r = freqdata[ncfft - k].r; | 144 fnkc.r = freqdata[ncfft - k].r; |
140 fnkc.i = -freqdata[ncfft - k].i; | 145 fnkc.i = -freqdata[ncfft - k].i; |
141 C_FIXDIV(fk , 2); | 146 C_FIXDIV(fk, 2); |
142 C_FIXDIV(fnkc , 2); | 147 C_FIXDIV(fnkc, 2); |
143 | 148 |
144 C_ADD(fek, fk, fnkc); | 149 C_ADD(fek, fk, fnkc); |
145 C_SUB(tmp, fk, fnkc); | 150 C_SUB(tmp, fk, fnkc); |
146 C_MUL(fok, tmp, st->super_twiddles[k-1]); | 151 C_MUL(fok, tmp, st->super_twiddles[k-1]); |
147 C_ADD(st->tmpbuf[k], fek, fok); | 152 C_ADD(st->tmpbuf[k], fek, fok); |
148 C_SUB(st->tmpbuf[ncfft - k], fek, fok); | 153 C_SUB(st->tmpbuf[ncfft - k], fek, fok); |
149 #ifdef USE_SIMD | 154 #if USE_SIMD == SIMD_SSE2 |
150 st->tmpbuf[ncfft - k].i *= _mm_set1_ps(-1.0); | 155 st->tmpbuf[ncfft - k].i *= _mm_set1_ps(-1.0f); |
156 #elif USE_SIMD == SIMD_NEON4 | |
157 st->tmpbuf[ncfft - k].i *= vdupq_n_f32(-1.0f); | |
158 #elif USE_SIMD == SIMD_NEON2 | |
159 st->tmpbuf[ncfft - k].i *= vdup_n_f32(-1.0f); | |
151 #else | 160 #else |
152 st->tmpbuf[ncfft - k].i *= -1; | 161 st->tmpbuf[ncfft - k].i *= -1; |
153 #endif | 162 #endif |
154 } | 163 } |
155 peck_fft(st->substate, st->tmpbuf, (peck_fft_cpx *) timedata); | 164 peck_fft(st->substate, st->tmpbuf, (peck_fft_cpx *) timedata); |