comparison peck_fftr.c @ 4:2d6c49fcafcb

neon2 and neon4 support
author Peter Meerwald <p.meerwald@bct-electronic.com>
date Fri, 16 Sep 2011 14:04:19 +0200
parents 3b31bd44a09f
children fee54f1878f7
comparison
equal deleted inserted replaced
3:3b31bd44a09f 4:2d6c49fcafcb
17 17
18 struct peck_fftr_state{ 18 struct peck_fftr_state{
19 peck_fft_cfg substate; 19 peck_fft_cfg substate;
20 peck_fft_cpx *tmpbuf; 20 peck_fft_cpx *tmpbuf;
21 peck_fft_cpx *super_twiddles; 21 peck_fft_cpx *super_twiddles;
22 #ifdef USE_SIMD 22 #if USE_SIMD == SIMD_SSE2
23 void * pad; 23 void *pad;
24 #endif 24 #endif
25 }; 25 };
26 26
27 peck_fftr_cfg peck_fftr_alloc(int nfft, int inverse_fft, void *mem, size_t *lenmem) { 27 peck_fftr_cfg peck_fftr_alloc(int nfft, int inverse_fft, void *mem, size_t *lenmem) {
28 int i; 28 int i;
32 if (nfft & 1) { 32 if (nfft & 1) {
33 fprintf(stderr, "Real FFT must be even.\n"); 33 fprintf(stderr, "Real FFT must be even.\n");
34 return NULL; 34 return NULL;
35 } 35 }
36 nfft >>= 1; 36 nfft >>= 1;
37 peck_fft_alloc(nfft, inverse_fft, NULL, &subsize);
37 38
38 peck_fft_alloc(nfft, inverse_fft, NULL, &subsize);
39 memneeded = sizeof(struct peck_fftr_state) + subsize + sizeof(peck_fft_cpx) * (nfft * 3 / 2); 39 memneeded = sizeof(struct peck_fftr_state) + subsize + sizeof(peck_fft_cpx) * (nfft * 3 / 2);
40
41 if (lenmem == NULL) { 40 if (lenmem == NULL) {
42 st = (peck_fftr_cfg) PECK_FFT_MALLOC(memneeded); 41 st = (peck_fftr_cfg) PECK_FFT_MALLOC(memneeded);
43 } else { 42 } else {
44 if (*lenmem >= memneeded) 43 if (*lenmem >= memneeded)
45 st = (peck_fftr_cfg) mem; 44 st = (peck_fftr_cfg) mem;
49 return NULL; 48 return NULL;
50 49
51 st->substate = (peck_fft_cfg) (st + 1); /* just beyond peck_fftr_state struct */ 50 st->substate = (peck_fft_cfg) (st + 1); /* just beyond peck_fftr_state struct */
52 st->tmpbuf = (peck_fft_cpx *) (((char *) st->substate) + subsize); 51 st->tmpbuf = (peck_fft_cpx *) (((char *) st->substate) + subsize);
53 st->super_twiddles = st->tmpbuf + nfft; 52 st->super_twiddles = st->tmpbuf + nfft;
53
54 peck_fft_alloc(nfft, inverse_fft, st->substate, &subsize); 54 peck_fft_alloc(nfft, inverse_fft, st->substate, &subsize);
55 55
56 for (i = 0; i < nfft/2; ++i) { 56 for (i = 0; i < nfft/2; ++i) {
57 float phase = 57 float phase =
58 -3.14159265359f * ((float) (i+1) / nfft + 0.5f); 58 -3.14159265359f * ((float) (i+1) / nfft + 0.5f);
59 if (inverse_fft) 59 if (inverse_fft)
60 phase *= -1; 60 phase *= -1;
61 kf_cexp(st->super_twiddles+i, phase); 61 kf_cexp(st->super_twiddles+i, phase);
62 } 62 }
63
63 return st; 64 return st;
64 } 65 }
65 66
66 void peck_fftr(peck_fftr_cfg st, const peck_fft_scalar *timedata, peck_fft_cpx *freqdata) { 67 void peck_fftr(peck_fftr_cfg st, const peck_fft_scalar *timedata, peck_fft_cpx *freqdata) {
67 /* Input buffer timedata is stored row-wise */ 68 /* Input buffer timedata is stored row-wise */
92 C_FIXDIV(tdc,2); 93 C_FIXDIV(tdc,2);
93 CHECK_OVERFLOW_OP(tdc.r ,+, tdc.i); 94 CHECK_OVERFLOW_OP(tdc.r ,+, tdc.i);
94 CHECK_OVERFLOW_OP(tdc.r ,-, tdc.i); 95 CHECK_OVERFLOW_OP(tdc.r ,-, tdc.i);
95 freqdata[0].r = tdc.r + tdc.i; 96 freqdata[0].r = tdc.r + tdc.i;
96 freqdata[ncfft].r = tdc.r - tdc.i; 97 freqdata[ncfft].r = tdc.r - tdc.i;
97 #ifdef USE_SIMD 98 #if USE_SIMD == SIMD_SSE2
98 freqdata[ncfft].i = freqdata[0].i = _mm_set1_ps(0); 99 freqdata[ncfft].i = freqdata[0].i = _mm_set1_ps(0);
100 #elif USE_SIMD == SIMD_NEON4
101 freqdata[ncfft].i = freqdata[0].i = vdupq_n_f32(0.0f);
102 #elif USE_SIMD == SIMD_NEON2
103 freqdata[ncfft].i = freqdata[0].i = vdup_n_f32(0.0f);
99 #else 104 #else
100 freqdata[ncfft].i = freqdata[0].i = 0; 105 freqdata[ncfft].i = freqdata[0].i = 0;
101 #endif 106 #endif
102 107
103 for (k = 1; k <= ncfft/2; ++k) { 108 for (k = 1; k <= ncfft/2; ++k) {
136 for (k = 1; k <= ncfft / 2; ++k) { 141 for (k = 1; k <= ncfft / 2; ++k) {
137 peck_fft_cpx fk, fnkc, fek, fok, tmp; 142 peck_fft_cpx fk, fnkc, fek, fok, tmp;
138 fk = freqdata[k]; 143 fk = freqdata[k];
139 fnkc.r = freqdata[ncfft - k].r; 144 fnkc.r = freqdata[ncfft - k].r;
140 fnkc.i = -freqdata[ncfft - k].i; 145 fnkc.i = -freqdata[ncfft - k].i;
141 C_FIXDIV(fk , 2); 146 C_FIXDIV(fk, 2);
142 C_FIXDIV(fnkc , 2); 147 C_FIXDIV(fnkc, 2);
143 148
144 C_ADD(fek, fk, fnkc); 149 C_ADD(fek, fk, fnkc);
145 C_SUB(tmp, fk, fnkc); 150 C_SUB(tmp, fk, fnkc);
146 C_MUL(fok, tmp, st->super_twiddles[k-1]); 151 C_MUL(fok, tmp, st->super_twiddles[k-1]);
147 C_ADD(st->tmpbuf[k], fek, fok); 152 C_ADD(st->tmpbuf[k], fek, fok);
148 C_SUB(st->tmpbuf[ncfft - k], fek, fok); 153 C_SUB(st->tmpbuf[ncfft - k], fek, fok);
149 #ifdef USE_SIMD 154 #if USE_SIMD == SIMD_SSE2
150 st->tmpbuf[ncfft - k].i *= _mm_set1_ps(-1.0); 155 st->tmpbuf[ncfft - k].i *= _mm_set1_ps(-1.0f);
156 #elif USE_SIMD == SIMD_NEON4
157 st->tmpbuf[ncfft - k].i *= vdupq_n_f32(-1.0f);
158 #elif USE_SIMD == SIMD_NEON2
159 st->tmpbuf[ncfft - k].i *= vdup_n_f32(-1.0f);
151 #else 160 #else
152 st->tmpbuf[ncfft - k].i *= -1; 161 st->tmpbuf[ncfft - k].i *= -1;
153 #endif 162 #endif
154 } 163 }
155 peck_fft(st->substate, st->tmpbuf, (peck_fft_cpx *) timedata); 164 peck_fft(st->substate, st->tmpbuf, (peck_fft_cpx *) timedata);

Repositories maintained by Peter Meerwald, pmeerw@pmeerw.net.