Mercurial > hg > pa-neon
diff sconv_neon.c @ 5:07763f536182 default tip
ALIGNment support
author | Peter Meerwald <p.meerwald@bct-electronic.com> |
---|---|
date | Sun, 08 Jul 2012 21:48:08 +0200 |
parents | e889fd0e7769 |
children |
line wrap: on
line diff
--- a/sconv_neon.c Sun Jul 08 21:03:41 2012 +0200 +++ b/sconv_neon.c Sun Jul 08 21:48:08 2012 +0200 @@ -109,7 +109,7 @@ "1:\n\t" "vld1.16 {d0}, [%[src]]!\n\t" "vmovl.s16 q0, d0\n\t" - + "vcvt.f32.s32 q0, q0\n\t" "vmul.f32 q0, q0, q1\n\t" @@ -130,11 +130,12 @@ #define SAMPLES 1019 #define TIMES 100000 +#define ALIGN 1 static void run_test_from(void) { - int16_t samples[SAMPLES]; - int16_t samples_ref[SAMPLES]; - float floats[SAMPLES]; + int16_t samples[SAMPLES+ALIGN]; + int16_t samples_ref[SAMPLES+ALIGN]; + float floats[SAMPLES+ALIGN]; int i; pa_usec_t start, stop; @@ -143,14 +144,14 @@ memset(samples_ref, 0, sizeof(samples_ref)); memset(samples, 0, sizeof(samples)); - for (i = 0; i < SAMPLES; i++) { + for (i = 0; i < SAMPLES+ALIGN; i++) { floats[i] = 2.1f * (rand()/(float) RAND_MAX - 0.5f); } - pa_sconv_s16le_from_float32ne(SAMPLES, floats, samples_ref); - pa_sconv_s16le_from_f32ne_neon(SAMPLES, floats, samples); + pa_sconv_s16le_from_float32ne(SAMPLES, floats+ALIGN, samples_ref+ALIGN); + pa_sconv_s16le_from_f32ne_neon(SAMPLES, floats+ALIGN, samples+ALIGN); - for (i = 0; i < SAMPLES; i++) { + for (i = ALIGN; i < SAMPLES+ALIGN; i++) { if (abs(samples[i] - samples_ref[i]) > 0) { pa_log_debug("%d: %d != %d (%f)", i, samples[i], samples_ref[i], floats[i]); @@ -159,41 +160,39 @@ start = pa_rtclock_now(); for (i = 0; i < TIMES; i++) { - pa_sconv_s16le_from_f32ne_neon(SAMPLES, floats, samples); + pa_sconv_s16le_from_f32ne_neon(SAMPLES, floats+ALIGN, samples+ALIGN); } stop = pa_rtclock_now(); pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); start = pa_rtclock_now(); for (i = 0; i < TIMES; i++) { - pa_sconv_s16le_from_float32ne(SAMPLES, floats, samples_ref); + pa_sconv_s16le_from_float32ne(SAMPLES, floats+ALIGN, samples_ref+ALIGN); } stop = pa_rtclock_now(); pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); } static void run_test_to(void) { - int16_t samples[SAMPLES]; - float floats[SAMPLES]; - float floats_ref[SAMPLES]; + int16_t samples[SAMPLES+ALIGN]; + float floats[SAMPLES+ALIGN]; + float floats_ref[SAMPLES+ALIGN]; int i; pa_usec_t start, stop; - pa_convert_func_t func; pa_log_debug("checking NEON sconv_s16le_to_float(%d)", SAMPLES); memset(floats_ref, 0, sizeof(floats_ref)); memset(floats, 0, sizeof(float)); - for (i = 0; i < SAMPLES; i++) { + for (i = 0; i < SAMPLES+ALIGN; i++) { samples[i] = rand() - RAND_MAX/2; } - func = (pa_convert_func_t) pa_sconv_s16le_to_float32ne; - func(SAMPLES, samples, floats_ref); - pa_sconv_s16le_to_f32ne_neon(SAMPLES, samples, floats); + pa_sconv_s16le_to_float32ne(SAMPLES, samples+ALIGN, floats_ref+ALIGN); + pa_sconv_s16le_to_f32ne_neon(SAMPLES, samples+ALIGN, floats+ALIGN); - for (i = 0; i < SAMPLES; i++) { + for (i = ALIGN; i < SAMPLES+ALIGN; i++) { if (fabsf(floats[i] - floats_ref[i]) > 0.00001) { pa_log_debug("%d: %.8f != %.8f (%d)", i, floats[i], floats_ref[i], samples[i]); @@ -202,14 +201,14 @@ start = pa_rtclock_now(); for (i = 0; i < TIMES; i++) { - pa_sconv_s16le_to_f32ne_neon(SAMPLES, samples, floats); + pa_sconv_s16le_to_f32ne_neon(SAMPLES, samples+ALIGN, floats+ALIGN); } stop = pa_rtclock_now(); pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); start = pa_rtclock_now(); for (i = 0; i < TIMES; i++) { - func(SAMPLES, samples, floats_ref); + pa_sconv_s16le_to_float32ne(SAMPLES, samples+ALIGN, floats_ref+ALIGN); } stop = pa_rtclock_now(); pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start));