Mercurial > hg > peckfft
changeset 9:8726585681f6
backup
author | Peter Meerwald <p.meerwald@bct-electronic.com> |
---|---|
date | Wed, 21 Sep 2011 12:18:40 +0200 |
parents | f2d3b39267ee |
children | 05f6ab0a17c0 |
files | compile.sh kf_bfly2.S peck_fft.c |
diffstat | 3 files changed, 90 insertions(+), 24 deletions(-) [+] |
line wrap: on
line diff
--- a/compile.sh Mon Sep 19 19:49:56 2011 +0200 +++ b/compile.sh Wed Sep 21 12:18:40 2011 +0200 @@ -1,36 +1,52 @@ -gcc \ - -O2 -march=native -msse2 -mfpmath=sse -ffast-math -fomit-frame-pointer \ - -DUSE_SIMD=SIMD_SSE2 \ +/opt/arm-2011.03/bin/arm-none-linux-gnueabi-gcc \ + -O3 -mcpu=cortex-a8 -mfpu=neon -mfloat-abi=softfp -ffast-math -fomit-frame-pointer \ + -DUSE_SIMD=SIMD_NEON2 -DBFLY2_ASM=1 \ -I . \ - -o peck_test_x86 \ - peck_fftr.c peck_fft.c \ - peck_test.c \ + -c -o kf_bfly2_only.o -g \ + kf_bfly2.S \ -lm /opt/arm-2011.03/bin/arm-none-linux-gnueabi-gcc \ -O3 -mcpu=cortex-a8 -mfpu=neon -mfloat-abi=softfp -ffast-math -fomit-frame-pointer \ - -DUSE_SIMD=SIMD_NEON \ - -I . \ - -o peck_test_neon \ + -DUSE_SIMD=SIMD_NEON2 -DBFLY2_ASM=1 \ + -I . -I ../armv7_cycles \ + -o peck_test_arm -g \ + peck_fft.c peck_fftr.c peck_test.c kf_bfly2.S ../armv7_cycles/armv7_cycles.c \ + -lm + +gcc \ + -O2 -march=native -msse2 -mfpmath=sse -ffast-math -fomit-frame-pointer \ + -DUSE_SIMD=SIMD_SSE2 -DBFLY2_ASM=0 \ + -I . -I ../armv7_cycles \ + -o peck_test_x86 \ peck_fftr.c peck_fft.c \ - peck_test.c \ + peck_test.c ../armv7_cycles/armv7_cycles.c \ -lm /opt/arm-2011.03/bin/arm-none-linux-gnueabi-gcc \ -O3 -mcpu=cortex-a8 -mfpu=neon -mfloat-abi=softfp -ffast-math -fomit-frame-pointer \ - -DUSE_SIMD=SIMD_NEON2 \ - -I . \ - -o peck_test_neon2 \ + -DUSE_SIMD=SIMD_NEON -DBFLY2_ASM=0 \ + -I . -I ../armv7_cycles \ + -o peck_test_neon \ peck_fftr.c peck_fft.c \ - peck_test.c \ + peck_test.c ../armv7_cycles/armv7_cycles.c \ -lm /opt/arm-2011.03/bin/arm-none-linux-gnueabi-gcc \ -O3 -mcpu=cortex-a8 -mfpu=neon -mfloat-abi=softfp -ffast-math -fomit-frame-pointer \ - -DUSE_SIMD=SIMD_NEON4 \ - -I . \ + -DUSE_SIMD=SIMD_NEON2 -DBFLY2_ASM=0 \ + -I . -I ../armv7_cycles \ + -o peck_test_neon2 \ + peck_fftr.c peck_fft.c \ + peck_test.c ../armv7_cycles/armv7_cycles.c \ + -lm + +/opt/arm-2011.03/bin/arm-none-linux-gnueabi-gcc \ + -O3 -mcpu=cortex-a8 -mfpu=neon -mfloat-abi=softfp -ffast-math -fomit-frame-pointer \ + -DUSE_SIMD=SIMD_NEON4 -DBFLY2_ASM=0 \ + -I . -I ../armv7_cycles \ -o peck_test_neon4 \ peck_fftr.c peck_fft.c \ - peck_test.c \ + peck_test.c ../armv7_cycles/armv7_cycles.c \ -lm
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/kf_bfly2.S Wed Sep 21 12:18:40 2011 +0200 @@ -0,0 +1,48 @@ + .cpu cortex-a8 + .eabi_attribute 27, 3 + .fpu neon + .eabi_attribute 23, 1 + .eabi_attribute 24, 1 + .eabi_attribute 25, 1 + .eabi_attribute 26, 2 + .eabi_attribute 30, 1 + .eabi_attribute 18, 4 + + .text + .align 2 + .global kf_bfly2 + .type kf_bfly2, %function +kf_bfly2: + .fnstart + .cfi_startproc + add r2, r2, #264 + mov r1, r1, asl #4 + sub r1, r1, #16 + add ip, r0, r3, asl #4 +.for: + + vld1.32 {d24,d25}, [ip] + vld1.32 {d20,d21}, [r2]! + + vmul.f32 d17, d24, d20 + vmul.f32 d18, d20, d25 + vld1.32 {d19,d20}, [r0] + vmls.f32 d17, d25, d21 + vmla.f32 d18, d24, d21 + + add r2, r2, r1 + + vsub.f32 d24, d19, d17 + vsub.f32 d25, d20, d18 + vst1.32 {d24,d25}, [ip]! + + vadd.f32 d17, d19, d17 + vadd.f32 d18, d20, d18 + vst1.32 {d17,d18}, [r0]! + + subs r3, r3, #1 + bne .for + bx lr + .cfi_endproc + .fnend + .size kf_bfly2, .-kf_bfly2
--- a/peck_fft.c Mon Sep 19 19:49:56 2011 +0200 +++ b/peck_fft.c Wed Sep 21 12:18:40 2011 +0200 @@ -12,12 +12,12 @@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - +#include "armv7_cycles.h" #include "_peck_fft_guts.h" /* The guts header contains all the multiplication and addition macros that are defined for * fixed or floating point complex numbers. It also delares the kf_ internal functions. */ - +#if !BFLY2_ASM static void kf_bfly2( peck_fft_cpx *Fout, const size_t fstride, @@ -26,11 +26,10 @@ // printf("kf_bfly2, %d\n", fstride); - peck_fft_cpx *Fout2; - peck_fft_cpx *tw1 = st->twiddles; - peck_fft_cpx t; - Fout2 = Fout + m; + peck_fft_cpx * __restrict tw1 = st->twiddles; + peck_fft_cpx * __restrict Fout2 = Fout + m; do { + peck_fft_cpx t; C_MUL(t, *Fout2, *tw1); tw1 += fstride; C_SUB(*Fout2, *Fout, t); @@ -39,6 +38,7 @@ ++Fout; } while (--m); } +#endif static void kf_bfly4( peck_fft_cpx * Fout, @@ -264,7 +264,9 @@ // recombine the p smaller DFTs switch (p) { - case 2: kf_bfly2(Fout, fstride, st, m); break; + case 2: + kf_bfly2(Fout, fstride, st, m); + break; case 3: kf_bfly3(Fout, fstride, st, m); break; case 4: kf_bfly4(Fout, fstride, st, m); break; case 5: kf_bfly5(Fout, fstride, st, m); break;