diff peck_fftr.c @ 4:2d6c49fcafcb

neon2 and neon4 support
author Peter Meerwald <p.meerwald@bct-electronic.com>
date Fri, 16 Sep 2011 14:04:19 +0200
parents 3b31bd44a09f
children fee54f1878f7
line wrap: on
line diff
--- a/peck_fftr.c	Fri Sep 16 13:08:20 2011 +0200
+++ b/peck_fftr.c	Fri Sep 16 14:04:19 2011 +0200
@@ -19,8 +19,8 @@
     peck_fft_cfg substate;
     peck_fft_cpx *tmpbuf;
     peck_fft_cpx *super_twiddles;
-#ifdef USE_SIMD    
-    void * pad;
+#if USE_SIMD == SIMD_SSE2
+    void *pad;
 #endif    
 };
 
@@ -34,10 +34,9 @@
         return NULL;
     }
     nfft >>= 1;
+    peck_fft_alloc(nfft, inverse_fft, NULL, &subsize);
 
-    peck_fft_alloc(nfft, inverse_fft, NULL, &subsize);
     memneeded = sizeof(struct peck_fftr_state) + subsize + sizeof(peck_fft_cpx) * (nfft * 3 / 2);
-
     if (lenmem == NULL) {
         st = (peck_fftr_cfg) PECK_FFT_MALLOC(memneeded);
     } else {
@@ -51,6 +50,7 @@
     st->substate = (peck_fft_cfg) (st + 1); /* just beyond peck_fftr_state struct */
     st->tmpbuf = (peck_fft_cpx *) (((char *) st->substate) + subsize);
     st->super_twiddles = st->tmpbuf + nfft;
+
     peck_fft_alloc(nfft, inverse_fft, st->substate, &subsize);
 
     for (i = 0; i < nfft/2; ++i) {
@@ -60,6 +60,7 @@
             phase *= -1;
         kf_cexp(st->super_twiddles+i, phase);
     }
+
     return st;
 }
 
@@ -94,8 +95,12 @@
     CHECK_OVERFLOW_OP(tdc.r ,-, tdc.i);
     freqdata[0].r = tdc.r + tdc.i;
     freqdata[ncfft].r = tdc.r - tdc.i;
-#ifdef USE_SIMD    
+#if USE_SIMD == SIMD_SSE2
     freqdata[ncfft].i = freqdata[0].i = _mm_set1_ps(0);
+#elif USE_SIMD == SIMD_NEON4
+    freqdata[ncfft].i = freqdata[0].i = vdupq_n_f32(0.0f);
+#elif USE_SIMD == SIMD_NEON2
+    freqdata[ncfft].i = freqdata[0].i = vdup_n_f32(0.0f);
 #else
     freqdata[ncfft].i = freqdata[0].i = 0;
 #endif
@@ -138,16 +143,20 @@
         fk = freqdata[k];
         fnkc.r = freqdata[ncfft - k].r;
         fnkc.i = -freqdata[ncfft - k].i;
-        C_FIXDIV(fk , 2);
-        C_FIXDIV(fnkc , 2);
+        C_FIXDIV(fk, 2);
+        C_FIXDIV(fnkc, 2);
 
         C_ADD(fek, fk, fnkc);
         C_SUB(tmp, fk, fnkc);
         C_MUL(fok, tmp, st->super_twiddles[k-1]);
         C_ADD(st->tmpbuf[k],     fek, fok);
         C_SUB(st->tmpbuf[ncfft - k], fek, fok);
-#ifdef USE_SIMD        
-        st->tmpbuf[ncfft - k].i *= _mm_set1_ps(-1.0);
+#if USE_SIMD == SIMD_SSE2
+        st->tmpbuf[ncfft - k].i *= _mm_set1_ps(-1.0f);
+#elif USE_SIMD == SIMD_NEON4
+        st->tmpbuf[ncfft - k].i *= vdupq_n_f32(-1.0f);
+#elif USE_SIMD == SIMD_NEON2
+        st->tmpbuf[ncfft - k].i *= vdup_n_f32(-1.0f);
 #else
         st->tmpbuf[ncfft - k].i *= -1;
 #endif

Repositories maintained by Peter Meerwald, pmeerw@pmeerw.net.