Mercurial > hg > peckfft
diff peck_fftr.c @ 4:2d6c49fcafcb
neon2 and neon4 support
author | Peter Meerwald <p.meerwald@bct-electronic.com> |
---|---|
date | Fri, 16 Sep 2011 14:04:19 +0200 |
parents | 3b31bd44a09f |
children | fee54f1878f7 |
line wrap: on
line diff
--- a/peck_fftr.c Fri Sep 16 13:08:20 2011 +0200 +++ b/peck_fftr.c Fri Sep 16 14:04:19 2011 +0200 @@ -19,8 +19,8 @@ peck_fft_cfg substate; peck_fft_cpx *tmpbuf; peck_fft_cpx *super_twiddles; -#ifdef USE_SIMD - void * pad; +#if USE_SIMD == SIMD_SSE2 + void *pad; #endif }; @@ -34,10 +34,9 @@ return NULL; } nfft >>= 1; + peck_fft_alloc(nfft, inverse_fft, NULL, &subsize); - peck_fft_alloc(nfft, inverse_fft, NULL, &subsize); memneeded = sizeof(struct peck_fftr_state) + subsize + sizeof(peck_fft_cpx) * (nfft * 3 / 2); - if (lenmem == NULL) { st = (peck_fftr_cfg) PECK_FFT_MALLOC(memneeded); } else { @@ -51,6 +50,7 @@ st->substate = (peck_fft_cfg) (st + 1); /* just beyond peck_fftr_state struct */ st->tmpbuf = (peck_fft_cpx *) (((char *) st->substate) + subsize); st->super_twiddles = st->tmpbuf + nfft; + peck_fft_alloc(nfft, inverse_fft, st->substate, &subsize); for (i = 0; i < nfft/2; ++i) { @@ -60,6 +60,7 @@ phase *= -1; kf_cexp(st->super_twiddles+i, phase); } + return st; } @@ -94,8 +95,12 @@ CHECK_OVERFLOW_OP(tdc.r ,-, tdc.i); freqdata[0].r = tdc.r + tdc.i; freqdata[ncfft].r = tdc.r - tdc.i; -#ifdef USE_SIMD +#if USE_SIMD == SIMD_SSE2 freqdata[ncfft].i = freqdata[0].i = _mm_set1_ps(0); +#elif USE_SIMD == SIMD_NEON4 + freqdata[ncfft].i = freqdata[0].i = vdupq_n_f32(0.0f); +#elif USE_SIMD == SIMD_NEON2 + freqdata[ncfft].i = freqdata[0].i = vdup_n_f32(0.0f); #else freqdata[ncfft].i = freqdata[0].i = 0; #endif @@ -138,16 +143,20 @@ fk = freqdata[k]; fnkc.r = freqdata[ncfft - k].r; fnkc.i = -freqdata[ncfft - k].i; - C_FIXDIV(fk , 2); - C_FIXDIV(fnkc , 2); + C_FIXDIV(fk, 2); + C_FIXDIV(fnkc, 2); C_ADD(fek, fk, fnkc); C_SUB(tmp, fk, fnkc); C_MUL(fok, tmp, st->super_twiddles[k-1]); C_ADD(st->tmpbuf[k], fek, fok); C_SUB(st->tmpbuf[ncfft - k], fek, fok); -#ifdef USE_SIMD - st->tmpbuf[ncfft - k].i *= _mm_set1_ps(-1.0); +#if USE_SIMD == SIMD_SSE2 + st->tmpbuf[ncfft - k].i *= _mm_set1_ps(-1.0f); +#elif USE_SIMD == SIMD_NEON4 + st->tmpbuf[ncfft - k].i *= vdupq_n_f32(-1.0f); +#elif USE_SIMD == SIMD_NEON2 + st->tmpbuf[ncfft - k].i *= vdup_n_f32(-1.0f); #else st->tmpbuf[ncfft - k].i *= -1; #endif