changeset 9:8726585681f6

backup
author Peter Meerwald <p.meerwald@bct-electronic.com>
date Wed, 21 Sep 2011 12:18:40 +0200
parents f2d3b39267ee
children 05f6ab0a17c0
files compile.sh kf_bfly2.S peck_fft.c
diffstat 3 files changed, 90 insertions(+), 24 deletions(-) [+]
line wrap: on
line diff
--- a/compile.sh	Mon Sep 19 19:49:56 2011 +0200
+++ b/compile.sh	Wed Sep 21 12:18:40 2011 +0200
@@ -1,36 +1,52 @@
-gcc \
-    -O2 -march=native -msse2 -mfpmath=sse -ffast-math -fomit-frame-pointer \
-    -DUSE_SIMD=SIMD_SSE2 \
+/opt/arm-2011.03/bin/arm-none-linux-gnueabi-gcc \
+	-O3 -mcpu=cortex-a8 -mfpu=neon -mfloat-abi=softfp -ffast-math -fomit-frame-pointer \
+    -DUSE_SIMD=SIMD_NEON2 -DBFLY2_ASM=1 \
     -I . \
-    -o peck_test_x86 \
-    peck_fftr.c peck_fft.c \
-    peck_test.c \
+    -c -o kf_bfly2_only.o -g \
+    kf_bfly2.S \
     -lm 
 
 /opt/arm-2011.03/bin/arm-none-linux-gnueabi-gcc \
 	-O3 -mcpu=cortex-a8 -mfpu=neon -mfloat-abi=softfp -ffast-math -fomit-frame-pointer \
-    -DUSE_SIMD=SIMD_NEON \
-    -I . \
-    -o peck_test_neon \
+    -DUSE_SIMD=SIMD_NEON2 -DBFLY2_ASM=1 \
+    -I . -I ../armv7_cycles \
+    -o peck_test_arm -g \
+    peck_fft.c peck_fftr.c peck_test.c kf_bfly2.S ../armv7_cycles/armv7_cycles.c \
+    -lm 
+
+gcc \
+    -O2 -march=native -msse2 -mfpmath=sse -ffast-math -fomit-frame-pointer \
+    -DUSE_SIMD=SIMD_SSE2 -DBFLY2_ASM=0 \
+    -I . -I ../armv7_cycles \
+    -o peck_test_x86 \
     peck_fftr.c peck_fft.c \
-    peck_test.c \
+    peck_test.c ../armv7_cycles/armv7_cycles.c \
     -lm 
 
 /opt/arm-2011.03/bin/arm-none-linux-gnueabi-gcc \
 	-O3 -mcpu=cortex-a8 -mfpu=neon -mfloat-abi=softfp -ffast-math -fomit-frame-pointer \
-    -DUSE_SIMD=SIMD_NEON2 \
-    -I . \
-    -o peck_test_neon2 \
+    -DUSE_SIMD=SIMD_NEON -DBFLY2_ASM=0 \
+    -I . -I ../armv7_cycles \
+    -o peck_test_neon \
     peck_fftr.c peck_fft.c \
-    peck_test.c \
+    peck_test.c ../armv7_cycles/armv7_cycles.c \
     -lm 
 
 /opt/arm-2011.03/bin/arm-none-linux-gnueabi-gcc \
 	-O3 -mcpu=cortex-a8 -mfpu=neon -mfloat-abi=softfp -ffast-math -fomit-frame-pointer \
-    -DUSE_SIMD=SIMD_NEON4 \
-    -I . \
+    -DUSE_SIMD=SIMD_NEON2 -DBFLY2_ASM=0 \
+    -I . -I ../armv7_cycles \
+    -o peck_test_neon2 \
+    peck_fftr.c peck_fft.c \
+    peck_test.c ../armv7_cycles/armv7_cycles.c \
+    -lm 
+
+/opt/arm-2011.03/bin/arm-none-linux-gnueabi-gcc \
+	-O3 -mcpu=cortex-a8 -mfpu=neon -mfloat-abi=softfp -ffast-math -fomit-frame-pointer \
+    -DUSE_SIMD=SIMD_NEON4 -DBFLY2_ASM=0 \
+    -I . -I ../armv7_cycles \
     -o peck_test_neon4 \
     peck_fftr.c peck_fft.c \
-    peck_test.c \
+    peck_test.c ../armv7_cycles/armv7_cycles.c \
     -lm 
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/kf_bfly2.S	Wed Sep 21 12:18:40 2011 +0200
@@ -0,0 +1,48 @@
+	.cpu cortex-a8
+	.eabi_attribute 27, 3
+	.fpu neon
+	.eabi_attribute 23, 1
+	.eabi_attribute 24, 1
+	.eabi_attribute 25, 1
+	.eabi_attribute 26, 2
+	.eabi_attribute 30, 1
+	.eabi_attribute 18, 4
+
+	.text
+	.align	2
+	.global kf_bfly2
+	.type	kf_bfly2, %function
+kf_bfly2:
+	.fnstart
+	.cfi_startproc
+	add	r2, r2, #264	
+	mov	r1, r1, asl #4
+	sub r1, r1, #16	
+	add	ip, r0, r3, asl #4
+.for:
+
+	vld1.32	{d24,d25}, [ip]
+	vld1.32	{d20,d21}, [r2]!
+
+	vmul.f32	d17, d24, d20
+	vmul.f32	d18, d20, d25
+    vld1.32    {d19,d20}, [r0]
+	vmls.f32	d17, d25, d21
+	vmla.f32	d18, d24, d21
+
+	add	r2, r2, r1
+
+	vsub.f32	d24, d19, d17
+	vsub.f32	d25, d20, d18
+	vst1.32 {d24,d25}, [ip]!
+
+	vadd.f32	d17, d19, d17
+	vadd.f32	d18, d20, d18
+	vst1.32 {d17,d18}, [r0]!
+	
+	subs	r3, r3, #1
+	bne	.for
+	bx	lr
+	.cfi_endproc
+	.fnend
+	.size	kf_bfly2, .-kf_bfly2
--- a/peck_fft.c	Mon Sep 19 19:49:56 2011 +0200
+++ b/peck_fft.c	Wed Sep 21 12:18:40 2011 +0200
@@ -12,12 +12,12 @@
 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
-
+#include "armv7_cycles.h"
 #include "_peck_fft_guts.h"
 /* The guts header contains all the multiplication and addition macros that are defined for
  * fixed or floating point complex numbers. It also delares the kf_ internal functions.
  */
-
+#if !BFLY2_ASM
 static void kf_bfly2(
     peck_fft_cpx *Fout,
     const size_t fstride,
@@ -26,11 +26,10 @@
 
 // printf("kf_bfly2, %d\n", fstride);
 
-    peck_fft_cpx *Fout2;
-    peck_fft_cpx *tw1 = st->twiddles;
-    peck_fft_cpx t;
-    Fout2 = Fout + m;
+    peck_fft_cpx * __restrict tw1 = st->twiddles;
+    peck_fft_cpx * __restrict Fout2 = Fout + m;
     do {
+        peck_fft_cpx t;
         C_MUL(t, *Fout2, *tw1);
         tw1 += fstride;
         C_SUB(*Fout2, *Fout, t);
@@ -39,6 +38,7 @@
         ++Fout;
     } while (--m);
 }
+#endif
 
 static void kf_bfly4(
     peck_fft_cpx * Fout,
@@ -264,7 +264,9 @@
 
     // recombine the p smaller DFTs 
     switch (p) {
-        case 2: kf_bfly2(Fout, fstride, st, m); break;
+        case 2: 
+            kf_bfly2(Fout, fstride, st, m); 
+            break;
         case 3: kf_bfly3(Fout, fstride, st, m); break; 
         case 4: kf_bfly4(Fout, fstride, st, m); break;
         case 5: kf_bfly5(Fout, fstride, st, m); break; 

Repositories maintained by Peter Meerwald, pmeerw@pmeerw.net.