Mercurial > hg > peckfft
comparison peck_fftr.c @ 4:2d6c49fcafcb
neon2 and neon4 support
| author | Peter Meerwald <p.meerwald@bct-electronic.com> |
|---|---|
| date | Fri, 16 Sep 2011 14:04:19 +0200 |
| parents | 3b31bd44a09f |
| children | fee54f1878f7 |
comparison
equal
deleted
inserted
replaced
| 3:3b31bd44a09f | 4:2d6c49fcafcb |
|---|---|
| 17 | 17 |
| 18 struct peck_fftr_state{ | 18 struct peck_fftr_state{ |
| 19 peck_fft_cfg substate; | 19 peck_fft_cfg substate; |
| 20 peck_fft_cpx *tmpbuf; | 20 peck_fft_cpx *tmpbuf; |
| 21 peck_fft_cpx *super_twiddles; | 21 peck_fft_cpx *super_twiddles; |
| 22 #ifdef USE_SIMD | 22 #if USE_SIMD == SIMD_SSE2 |
| 23 void * pad; | 23 void *pad; |
| 24 #endif | 24 #endif |
| 25 }; | 25 }; |
| 26 | 26 |
| 27 peck_fftr_cfg peck_fftr_alloc(int nfft, int inverse_fft, void *mem, size_t *lenmem) { | 27 peck_fftr_cfg peck_fftr_alloc(int nfft, int inverse_fft, void *mem, size_t *lenmem) { |
| 28 int i; | 28 int i; |
| 32 if (nfft & 1) { | 32 if (nfft & 1) { |
| 33 fprintf(stderr, "Real FFT must be even.\n"); | 33 fprintf(stderr, "Real FFT must be even.\n"); |
| 34 return NULL; | 34 return NULL; |
| 35 } | 35 } |
| 36 nfft >>= 1; | 36 nfft >>= 1; |
| 37 peck_fft_alloc(nfft, inverse_fft, NULL, &subsize); | |
| 37 | 38 |
| 38 peck_fft_alloc(nfft, inverse_fft, NULL, &subsize); | |
| 39 memneeded = sizeof(struct peck_fftr_state) + subsize + sizeof(peck_fft_cpx) * (nfft * 3 / 2); | 39 memneeded = sizeof(struct peck_fftr_state) + subsize + sizeof(peck_fft_cpx) * (nfft * 3 / 2); |
| 40 | |
| 41 if (lenmem == NULL) { | 40 if (lenmem == NULL) { |
| 42 st = (peck_fftr_cfg) PECK_FFT_MALLOC(memneeded); | 41 st = (peck_fftr_cfg) PECK_FFT_MALLOC(memneeded); |
| 43 } else { | 42 } else { |
| 44 if (*lenmem >= memneeded) | 43 if (*lenmem >= memneeded) |
| 45 st = (peck_fftr_cfg) mem; | 44 st = (peck_fftr_cfg) mem; |
| 49 return NULL; | 48 return NULL; |
| 50 | 49 |
| 51 st->substate = (peck_fft_cfg) (st + 1); /* just beyond peck_fftr_state struct */ | 50 st->substate = (peck_fft_cfg) (st + 1); /* just beyond peck_fftr_state struct */ |
| 52 st->tmpbuf = (peck_fft_cpx *) (((char *) st->substate) + subsize); | 51 st->tmpbuf = (peck_fft_cpx *) (((char *) st->substate) + subsize); |
| 53 st->super_twiddles = st->tmpbuf + nfft; | 52 st->super_twiddles = st->tmpbuf + nfft; |
| 53 | |
| 54 peck_fft_alloc(nfft, inverse_fft, st->substate, &subsize); | 54 peck_fft_alloc(nfft, inverse_fft, st->substate, &subsize); |
| 55 | 55 |
| 56 for (i = 0; i < nfft/2; ++i) { | 56 for (i = 0; i < nfft/2; ++i) { |
| 57 float phase = | 57 float phase = |
| 58 -3.14159265359f * ((float) (i+1) / nfft + 0.5f); | 58 -3.14159265359f * ((float) (i+1) / nfft + 0.5f); |
| 59 if (inverse_fft) | 59 if (inverse_fft) |
| 60 phase *= -1; | 60 phase *= -1; |
| 61 kf_cexp(st->super_twiddles+i, phase); | 61 kf_cexp(st->super_twiddles+i, phase); |
| 62 } | 62 } |
| 63 | |
| 63 return st; | 64 return st; |
| 64 } | 65 } |
| 65 | 66 |
| 66 void peck_fftr(peck_fftr_cfg st, const peck_fft_scalar *timedata, peck_fft_cpx *freqdata) { | 67 void peck_fftr(peck_fftr_cfg st, const peck_fft_scalar *timedata, peck_fft_cpx *freqdata) { |
| 67 /* Input buffer timedata is stored row-wise */ | 68 /* Input buffer timedata is stored row-wise */ |
| 92 C_FIXDIV(tdc,2); | 93 C_FIXDIV(tdc,2); |
| 93 CHECK_OVERFLOW_OP(tdc.r ,+, tdc.i); | 94 CHECK_OVERFLOW_OP(tdc.r ,+, tdc.i); |
| 94 CHECK_OVERFLOW_OP(tdc.r ,-, tdc.i); | 95 CHECK_OVERFLOW_OP(tdc.r ,-, tdc.i); |
| 95 freqdata[0].r = tdc.r + tdc.i; | 96 freqdata[0].r = tdc.r + tdc.i; |
| 96 freqdata[ncfft].r = tdc.r - tdc.i; | 97 freqdata[ncfft].r = tdc.r - tdc.i; |
| 97 #ifdef USE_SIMD | 98 #if USE_SIMD == SIMD_SSE2 |
| 98 freqdata[ncfft].i = freqdata[0].i = _mm_set1_ps(0); | 99 freqdata[ncfft].i = freqdata[0].i = _mm_set1_ps(0); |
| 100 #elif USE_SIMD == SIMD_NEON4 | |
| 101 freqdata[ncfft].i = freqdata[0].i = vdupq_n_f32(0.0f); | |
| 102 #elif USE_SIMD == SIMD_NEON2 | |
| 103 freqdata[ncfft].i = freqdata[0].i = vdup_n_f32(0.0f); | |
| 99 #else | 104 #else |
| 100 freqdata[ncfft].i = freqdata[0].i = 0; | 105 freqdata[ncfft].i = freqdata[0].i = 0; |
| 101 #endif | 106 #endif |
| 102 | 107 |
| 103 for (k = 1; k <= ncfft/2; ++k) { | 108 for (k = 1; k <= ncfft/2; ++k) { |
| 136 for (k = 1; k <= ncfft / 2; ++k) { | 141 for (k = 1; k <= ncfft / 2; ++k) { |
| 137 peck_fft_cpx fk, fnkc, fek, fok, tmp; | 142 peck_fft_cpx fk, fnkc, fek, fok, tmp; |
| 138 fk = freqdata[k]; | 143 fk = freqdata[k]; |
| 139 fnkc.r = freqdata[ncfft - k].r; | 144 fnkc.r = freqdata[ncfft - k].r; |
| 140 fnkc.i = -freqdata[ncfft - k].i; | 145 fnkc.i = -freqdata[ncfft - k].i; |
| 141 C_FIXDIV(fk , 2); | 146 C_FIXDIV(fk, 2); |
| 142 C_FIXDIV(fnkc , 2); | 147 C_FIXDIV(fnkc, 2); |
| 143 | 148 |
| 144 C_ADD(fek, fk, fnkc); | 149 C_ADD(fek, fk, fnkc); |
| 145 C_SUB(tmp, fk, fnkc); | 150 C_SUB(tmp, fk, fnkc); |
| 146 C_MUL(fok, tmp, st->super_twiddles[k-1]); | 151 C_MUL(fok, tmp, st->super_twiddles[k-1]); |
| 147 C_ADD(st->tmpbuf[k], fek, fok); | 152 C_ADD(st->tmpbuf[k], fek, fok); |
| 148 C_SUB(st->tmpbuf[ncfft - k], fek, fok); | 153 C_SUB(st->tmpbuf[ncfft - k], fek, fok); |
| 149 #ifdef USE_SIMD | 154 #if USE_SIMD == SIMD_SSE2 |
| 150 st->tmpbuf[ncfft - k].i *= _mm_set1_ps(-1.0); | 155 st->tmpbuf[ncfft - k].i *= _mm_set1_ps(-1.0f); |
| 156 #elif USE_SIMD == SIMD_NEON4 | |
| 157 st->tmpbuf[ncfft - k].i *= vdupq_n_f32(-1.0f); | |
| 158 #elif USE_SIMD == SIMD_NEON2 | |
| 159 st->tmpbuf[ncfft - k].i *= vdup_n_f32(-1.0f); | |
| 151 #else | 160 #else |
| 152 st->tmpbuf[ncfft - k].i *= -1; | 161 st->tmpbuf[ncfft - k].i *= -1; |
| 153 #endif | 162 #endif |
| 154 } | 163 } |
| 155 peck_fft(st->substate, st->tmpbuf, (peck_fft_cpx *) timedata); | 164 peck_fft(st->substate, st->tmpbuf, (peck_fft_cpx *) timedata); |
