diff peck_fft.c @ 10:05f6ab0a17c0

backup
author Peter Meerwald <p.meerwald@bct-electronic.com>
date Wed, 21 Sep 2011 15:20:58 +0200
parents 8726585681f6
children abdcde012978
line wrap: on
line diff
--- a/peck_fft.c	Wed Sep 21 12:18:40 2011 +0200
+++ b/peck_fft.c	Wed Sep 21 15:20:58 2011 +0200
@@ -19,7 +19,7 @@
  */
 #if !BFLY2_ASM
 static void kf_bfly2(
-    peck_fft_cpx *Fout,
+    peck_fft_cpx * __restrict Fout,
     const size_t fstride,
     const peck_fft_cfg st,
     int m) {
@@ -40,51 +40,77 @@
 }
 #endif
 
+#if !BFLY4_ASM
 static void kf_bfly4(
-    peck_fft_cpx * Fout,
+    peck_fft_cpx * __restrict Fout,
     const size_t fstride,
     const peck_fft_cfg st,
     const size_t m) {
 
-    peck_fft_cpx *tw1,*tw2,*tw3;
-    peck_fft_cpx scratch[6];
-    size_t k=m;
-    const size_t m2=2*m;
-    const size_t m3=3*m;
+    peck_fft_cpx scratch[4];
+    peck_fft_cpx * __restrict tw1, * __restrict tw2, * __restrict tw3;
+    size_t k = m;
+    const size_t m2 = 2*m;
+    const size_t m3 = 3*m;
 
 // printf("kf_bfly4, %d\n", fstride);
 
     tw3 = tw2 = tw1 = st->twiddles;
 
-    do {
-        C_MUL(scratch[0], Fout[m], *tw1);
-        C_MUL(scratch[1], Fout[m2], *tw2);
-        C_MUL(scratch[2], Fout[m3], *tw3);
+    if (st->inverse) {
+        do {
+            C_MUL(scratch[0], Fout[m], *tw1);
+            C_MUL(scratch[3], Fout[m2], *tw2);
+            C_MUL(scratch[2], Fout[m3], *tw3);
 
-        C_SUB(scratch[5], *Fout, scratch[1]);
-        C_ADDTO(*Fout, scratch[1]);
-        C_ADD(scratch[3], scratch[0], scratch[2]);
-        C_SUB(scratch[4], scratch[0], scratch[2]);
-        C_SUB(Fout[m2], *Fout, scratch[3]);
-        tw1 += fstride;
-        tw2 += fstride*2;
-        tw3 += fstride*3;
-        C_ADDTO(*Fout, scratch[3]);
+            C_SUB(scratch[1], *Fout, scratch[3]);
+            C_ADDTO(*Fout, scratch[3]);
+            
+            C_ADD(scratch[3], scratch[0], scratch[2]);
+            C_SUB(Fout[m2], *Fout, scratch[3]);
+            C_ADDTO(*Fout, scratch[3]);
+
+            tw1 += fstride;
+            tw2 += fstride*2;
+            tw3 += fstride*3;
+
+            C_SUB(scratch[3], scratch[0], scratch[2]);
+            Fout[m].r = scratch[1].r - scratch[3].i;
+            Fout[m].i = scratch[1].i + scratch[3].r;
+            Fout[m3].r = scratch[1].r + scratch[3].i;
+            Fout[m3].i = scratch[1].i - scratch[3].r;
 
-        if (st->inverse) {
-            Fout[m].r = scratch[5].r - scratch[4].i;
-            Fout[m].i = scratch[5].i + scratch[4].r;
-            Fout[m3].r = scratch[5].r + scratch[4].i;
-            Fout[m3].i = scratch[5].i - scratch[4].r;
-        } else {
-            Fout[m].r = scratch[5].r + scratch[4].i;
-            Fout[m].i = scratch[5].i - scratch[4].r;
-            Fout[m3].r = scratch[5].r - scratch[4].i;
-            Fout[m3].i = scratch[5].i + scratch[4].r;
-        }
-        ++Fout;
-    } while (--k);
+            ++Fout;
+        } while (--k);
+    }
+    else {
+        do {
+            C_MUL(scratch[0], Fout[m], *tw1);
+            C_MUL(scratch[3], Fout[m2], *tw2);
+            C_MUL(scratch[2], Fout[m3], *tw3);
+
+            C_SUB(scratch[1], *Fout, scratch[3]);
+            C_ADDTO(*Fout, scratch[3]);
+            
+            C_ADD(scratch[3], scratch[0], scratch[2]);
+            C_SUB(Fout[m2], *Fout, scratch[3]);
+            C_ADDTO(*Fout, scratch[3]);
+
+            tw1 += fstride;
+            tw2 += fstride*2;
+            tw3 += fstride*3;
+
+            C_SUB(scratch[3], scratch[0], scratch[2]);
+            Fout[m].r = scratch[1].r + scratch[3].i;
+            Fout[m].i = scratch[1].i - scratch[3].r;
+            Fout[m3].r = scratch[1].r - scratch[3].i;
+            Fout[m3].i = scratch[1].i + scratch[3].r;
+
+            ++Fout;
+        } while (--k);
+    }
 }
+#endif
 
 static void kf_bfly3(
      peck_fft_cpx * Fout,

Repositories maintained by Peter Meerwald, pmeerw@pmeerw.net.