Mercurial > hg > pa-neon
diff svolume_neon.c @ 5:07763f536182 default tip
ALIGNment support
author | Peter Meerwald <p.meerwald@bct-electronic.com> |
---|---|
date | Sun, 08 Jul 2012 21:48:08 +0200 |
parents | 1f6289166006 |
children |
line wrap: on
line diff
--- a/svolume_neon.c Sun Jul 08 21:03:41 2012 +0200 +++ b/svolume_neon.c Sun Jul 08 21:48:08 2012 +0200 @@ -340,18 +340,19 @@ #define TIMES 50000 #define CHANNELS 4 #define PADDING 16 +#define ALIGN 1 static void run_test_float(void) { - float floats[SAMPLES]; - float floats_ref[SAMPLES]; - float floats_orig[SAMPLES]; + float floats[SAMPLES+ALIGN]; + float floats_ref[SAMPLES+ALIGN]; + float floats_orig[SAMPLES+ALIGN]; float volumes[CHANNELS]; unsigned i; pa_usec_t start, stop; pa_log_debug("checking NEON volume_float32ne(%d)", SAMPLES); - for (i = 0; i < SAMPLES; i++) { + for (i = 0; i < SAMPLES+ALIGN; i++) { floats_orig[i] = rand()/(float) RAND_MAX - 0.5f; } memcpy(floats_ref, floats_orig, sizeof(floats_orig)); @@ -360,10 +361,10 @@ for (i = 0; i < CHANNELS; i++) volumes[i] = 0.5f * rand() / (float) RAND_MAX; - pa_volume_float32ne_neon(floats, volumes, CHANNELS, sizeof(floats)); - pa_volume_float32ne_c(floats_ref, volumes, CHANNELS, sizeof(floats_ref)); + pa_volume_float32ne_neon(floats+ALIGN, volumes, CHANNELS, sizeof(floats)); + pa_volume_float32ne_c(floats_ref+ALIGN, volumes, CHANNELS, sizeof(floats_ref)); - for (i = 0; i < SAMPLES; i++) { + for (i = ALIGN; i < SAMPLES+ALIGN; i++) { if (fabsf(floats[i] - floats_ref[i]) > 0.00001) { pa_log_debug("%d: %.3f != %.3f (%.3f)", i, floats[i], floats_ref[i], floats_orig[i]); @@ -373,7 +374,7 @@ start = pa_rtclock_now(); for (i = 0; i < TIMES; i++) { memcpy(floats, floats_orig, sizeof(floats_orig)); - pa_volume_float32ne_neon(floats, volumes, CHANNELS, sizeof(floats)); + pa_volume_float32ne_neon(floats+ALIGN, volumes, CHANNELS, sizeof(floats)); } stop = pa_rtclock_now(); pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); @@ -381,16 +382,16 @@ start = pa_rtclock_now(); for (i = 0; i < TIMES; i++) { memcpy(floats_ref, floats_orig, sizeof(floats_orig)); - pa_volume_float32ne_c(floats_ref, volumes, CHANNELS, sizeof(floats_ref)); + pa_volume_float32ne_c(floats_ref+ALIGN, volumes, CHANNELS, sizeof(floats_ref)); } stop = pa_rtclock_now(); pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); } static void run_test_s16(void) { - int16_t samples[SAMPLES]; - int16_t samples_ref[SAMPLES]; - int16_t samples_orig[SAMPLES]; + int16_t samples[SAMPLES+ALIGN]; + int16_t samples_ref[SAMPLES+ALIGN]; + int16_t samples_orig[SAMPLES+ALIGN]; uint32_t volumes[CHANNELS + PADDING]; unsigned i, padding; pa_usec_t start, stop; @@ -408,20 +409,20 @@ for (padding = 0; padding < PADDING; padding++, i++) volumes[i] = volumes[padding]; - pa_volume_s16ne_neon(samples, volumes, CHANNELS, sizeof(samples)); - pa_volume_s16ne_c(samples_ref, volumes, CHANNELS, sizeof(samples_ref)); + pa_volume_s16ne_neon(samples+ALIGN, volumes, CHANNELS, sizeof(samples)); + pa_volume_s16ne_c(samples_ref+ALIGN, volumes, CHANNELS, sizeof(samples_ref)); - for (i = 0; i < SAMPLES; i++) { + for (i = ALIGN; i < SAMPLES+ALIGN; i++) { if (abs(samples[i] - samples_ref[i]) > 0) { pa_log_debug("%d: %d != %d (%d)", i, samples[i], samples_ref[i], samples_orig[i]); } } -exit(0); + start = pa_rtclock_now(); for (i = 0; i < TIMES; i++) { memcpy(samples, samples_orig, sizeof(samples_orig)); - pa_volume_s16ne_neon(samples, volumes, CHANNELS, sizeof(samples)); + pa_volume_s16ne_neon(samples+ALIGN, volumes, CHANNELS, sizeof(samples)); } stop = pa_rtclock_now(); pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); @@ -429,7 +430,7 @@ start = pa_rtclock_now(); for (i = 0; i < TIMES; i++) { memcpy(samples, samples_orig, sizeof(samples_orig)); - pa_volume_s16ne_arm(samples, volumes, CHANNELS, sizeof(samples)); + pa_volume_s16ne_arm(samples+ALIGN, volumes, CHANNELS, sizeof(samples)); } stop = pa_rtclock_now(); pa_log_info("ARM: %llu usec.", (long long unsigned int)(stop - start));