# HG changeset patch # User Peter Meerwald # Date 1341776888 -7200 # Node ID 07763f53618212244b45358e7bb2736f3be23ac2 # Parent 1f6289166006786b83c76ae4937aacb58ba7edd4 ALIGNment support diff -r 1f6289166006 -r 07763f536182 remap_neon.c --- a/remap_neon.c Sun Jul 08 21:03:41 2012 +0200 +++ b/remap_neon.c Sun Jul 08 21:48:08 2012 +0200 @@ -239,7 +239,7 @@ static void mono_to_stereo_float_neon_a8(float *dst, const float *src, unsigned n) { int i = n & 3; - + asm volatile ( "mov %[n], %[n], lsr #2\n\t" "1:\n\t" @@ -288,7 +288,7 @@ static void mono_to_stereo_int16_neon(int16_t *dst, const int16_t *src, unsigned n) { int i = n & 7; - + asm volatile ( "mov %[n], %[n], lsr #3\n\t" "1:\n\t" @@ -298,7 +298,7 @@ "vst2.16 {q0,q1}, [%[dst]]!\n\t" "bgt 1b\n\t" // output operands (or input operands that get modified) - : [dst] "+r" (dst), [src] "+r" (src), [n] "+r" (n) + : [dst] "+r" (dst), [src] "+r" (src), [n] "+r" (n) : // input operands : "memory", "cc" // clobber list ); @@ -399,13 +399,14 @@ #define SAMPLES 1019 #define TIMES 500000 +#define ALIGN 1 static void run_test_mono_to_stereo_float(void) { - float stereo_a9[2*SAMPLES]; - float stereo_a8[2*SAMPLES]; - float stereo_ref[2*SAMPLES]; - float stereo_gen[2*SAMPLES]; - float mono[SAMPLES]; + float stereo_a9[2*SAMPLES+ALIGN]; + float stereo_a8[2*SAMPLES+ALIGN]; + float stereo_ref[2*SAMPLES+ALIGN]; + float stereo_gen[2*SAMPLES+ALIGN]; + float mono[SAMPLES+ALIGN]; int i; pa_usec_t start, stop; pa_sample_format_t sf; @@ -419,7 +420,7 @@ memset(stereo_a9, 0, sizeof(stereo_a9)); memset(stereo_a8, 0, sizeof(stereo_a8)); - for (i = 0; i < SAMPLES; i++) { + for (i = 0; i < SAMPLES+ALIGN; i++) { mono[i] = rand()/(float) RAND_MAX - 0.5f; } @@ -434,24 +435,24 @@ remap.map_table_f[0][0] = 1.0; remap.map_table_f[1][0] = 1.0; - remap_mono_to_stereo_neon_a9(&remap, stereo_a9, mono, SAMPLES); - remap_mono_to_stereo_neon_a8(&remap, stereo_a8, mono, SAMPLES); - remap_mono_to_stereo_c(&remap, stereo_ref, mono, SAMPLES); - remap_channels_matrix_c(&remap, stereo_gen, mono, SAMPLES); + remap_mono_to_stereo_neon_a9(&remap, stereo_a9+ALIGN, mono+ALIGN, SAMPLES); + remap_mono_to_stereo_neon_a8(&remap, stereo_a8+ALIGN, mono+ALIGN, SAMPLES); + remap_mono_to_stereo_c(&remap, stereo_ref+ALIGN, mono+ALIGN, SAMPLES); + remap_channels_matrix_c(&remap, stereo_gen+ALIGN, mono+ALIGN, SAMPLES); - for (i = 0; i < 2*SAMPLES; i++) { + for (i = ALIGN; i < 2*SAMPLES+ALIGN; i++) { if (fabsf(stereo_a9[i] - stereo_ref[i]) > 0.00001) { pa_log_debug("NEON/A9 %d: %.3f != %.3f (%.3f)", i, stereo_a9[i], stereo_ref[i], mono[i/2]); } } - for (i = 0; i < 2*SAMPLES; i++) { + for (i = ALIGN; i < 2*SAMPLES+ALIGN; i++) { if (fabsf(stereo_a8[i] - stereo_ref[i]) > 0.00001) { pa_log_debug("NEON/A8 %d: %.3f != %.3f (%.3f)", i, stereo_a8[i], stereo_ref[i], mono[i/2]); } } - for (i = 0; i < 2*SAMPLES; i++) { + for (i = ALIGN; i < 2*SAMPLES+ALIGN; i++) { if (fabsf(stereo_gen[i] - stereo_ref[i]) > 0.00001) { pa_log_debug("generic %d: %.3f != %.3f (%.3f)", i, stereo_gen[i], stereo_ref[i], mono[i/2]); @@ -460,38 +461,38 @@ start = pa_rtclock_now(); for (i = 0; i < TIMES; i++) { - remap_mono_to_stereo_c(&remap, stereo_ref, mono, SAMPLES); + remap_mono_to_stereo_c(&remap, stereo_ref+ALIGN, mono+ALIGN, SAMPLES); } stop = pa_rtclock_now(); pa_log_info("ref:\t\t%llu usec.", (long long unsigned int)(stop - start)); start = pa_rtclock_now(); for (i = 0; i < TIMES; i++) { - remap_mono_to_stereo_neon_a9(&remap, stereo_a9, mono, SAMPLES); + remap_mono_to_stereo_neon_a9(&remap, stereo_a9+ALIGN, mono+ALIGN, SAMPLES); } stop = pa_rtclock_now(); pa_log_info("NEON/A9:\t%llu usec.", (long long unsigned int)(stop - start)); start = pa_rtclock_now(); for (i = 0; i < TIMES; i++) { - remap_mono_to_stereo_neon_a8(&remap, stereo_a8, mono, SAMPLES); + remap_mono_to_stereo_neon_a8(&remap, stereo_a8+ALIGN, mono+ALIGN, SAMPLES); } stop = pa_rtclock_now(); pa_log_info("NEON/A8:\t%llu usec.", (long long unsigned int)(stop - start)); start = pa_rtclock_now(); for (i = 0; i < TIMES; i++) { - remap_channels_matrix_c(&remap, stereo_gen, mono, SAMPLES); + remap_channels_matrix_c(&remap, stereo_gen+ALIGN, mono+ALIGN, SAMPLES); } stop = pa_rtclock_now(); pa_log_info("generic:\t%llu usec.", (long long unsigned int)(stop - start)); } static void run_test_stereo_to_mono_float(void) { - float stereo[2*SAMPLES]; - float mono_ref[SAMPLES]; - float mono_gen[SAMPLES]; - float mono[SAMPLES]; + float stereo[2*SAMPLES+ALIGN]; + float mono_ref[SAMPLES+ALIGN]; + float mono_gen[SAMPLES+ALIGN]; + float mono[SAMPLES+ALIGN]; int i; pa_usec_t start, stop; pa_sample_format_t sf; @@ -503,7 +504,7 @@ memset(mono_ref, 0, sizeof(mono_ref)); memset(mono, 0, sizeof(mono)); - for (i = 0; i < 2*SAMPLES; i++) { + for (i = 0; i < 2*SAMPLES+ALIGN; i++) { stereo[i] = rand()/(float) RAND_MAX - 0.5f; } @@ -518,11 +519,11 @@ remap.map_table_f[0][0] = 1.0; remap.map_table_f[0][1] = 1.0; - remap_stereo_to_mono_c(&remap, mono_ref, stereo, SAMPLES); - remap_channels_matrix_c(&remap, mono_gen, stereo, SAMPLES); - remap_stereo_to_mono_neon(&remap, mono, stereo, SAMPLES); + remap_stereo_to_mono_c(&remap, mono_ref+ALIGN, stereo+ALIGN, SAMPLES); + remap_channels_matrix_c(&remap, mono_gen+ALIGN, stereo+ALIGN, SAMPLES); + remap_stereo_to_mono_neon(&remap, mono+ALIGN, stereo+ALIGN, SAMPLES); - for (i = 0; i < SAMPLES; i++) { + for (i = ALIGN; i < SAMPLES+ALIGN; i++) { if (fabsf(mono[i] - mono_ref[i]) > 0.00001) { pa_log_debug("%d: %.3f != %.3f (%.3f %0.3f)", i, mono[i], mono_ref[i], stereo[2*i+0], stereo[2*i+1]); @@ -531,32 +532,32 @@ start = pa_rtclock_now(); for (i = 0; i < TIMES; i++) { - remap_stereo_to_mono_neon(&remap, mono, stereo, SAMPLES); + remap_stereo_to_mono_neon(&remap, mono+ALIGN, stereo+ALIGN, SAMPLES); } stop = pa_rtclock_now(); pa_log_info("NEON:\t\t%llu usec.", (long long unsigned int)(stop - start)); start = pa_rtclock_now(); for (i = 0; i < TIMES; i++) { - remap_stereo_to_mono_c(&remap, mono_ref, stereo, SAMPLES); + remap_stereo_to_mono_c(&remap, mono_ref+ALIGN, stereo+ALIGN, SAMPLES); } stop = pa_rtclock_now(); pa_log_info("ref:\t\t%llu usec.", (long long unsigned int)(stop - start)); start = pa_rtclock_now(); for (i = 0; i < TIMES; i++) { - remap_channels_matrix_c(&remap, mono_gen, stereo, SAMPLES); + remap_channels_matrix_c(&remap, mono_gen+ALIGN, stereo+ALIGN, SAMPLES); } stop = pa_rtclock_now(); pa_log_info("generic:\t%llu usec.", (long long unsigned int)(stop - start)); } static void run_test_mono_to_stereo_s16(void) { - int16_t stereo_a9[2*SAMPLES]; - int16_t stereo_a8[2*SAMPLES]; - int16_t stereo_ref[2*SAMPLES]; - int16_t stereo_gen[2*SAMPLES]; - int16_t mono[SAMPLES]; + int16_t stereo_a9[2*SAMPLES+ALIGN]; + int16_t stereo_a8[2*SAMPLES+ALIGN]; + int16_t stereo_ref[2*SAMPLES+ALIGN]; + int16_t stereo_gen[2*SAMPLES+ALIGN]; + int16_t mono[SAMPLES+ALIGN]; int i; pa_usec_t start, stop; pa_sample_format_t sf; @@ -570,7 +571,7 @@ memset(stereo_a8, 0, sizeof(stereo_a8)); memset(stereo_gen, 0, sizeof(stereo_gen)); - for (i = 0; i < SAMPLES; i++) { + for (i = 0; i < SAMPLES+ALIGN; i++) { mono[i] = rand() - RAND_MAX/2; } @@ -584,26 +585,26 @@ remap.o_ss = &oss; remap.map_table_i[0][0] = 0x10000; remap.map_table_i[1][0] = 0x10000; - - remap_mono_to_stereo_c(&remap, stereo_ref, mono, SAMPLES); - remap_channels_matrix_c(&remap, stereo_gen, mono, SAMPLES); - remap_mono_to_stereo_neon_a9(&remap, stereo_a9, mono, SAMPLES); - remap_mono_to_stereo_neon_a8(&remap, stereo_a8, mono, SAMPLES); - - for (i = 0; i < 2*SAMPLES; i++) { + + remap_mono_to_stereo_c(&remap, stereo_ref+ALIGN, mono+ALIGN, SAMPLES); + remap_channels_matrix_c(&remap, stereo_gen+ALIGN, mono+ALIGN, SAMPLES); + remap_mono_to_stereo_neon_a9(&remap, stereo_a9+ALIGN, mono+ALIGN, SAMPLES); + remap_mono_to_stereo_neon_a8(&remap, stereo_a8+ALIGN, mono+ALIGN, SAMPLES); + + for (i = ALIGN; i < 2*SAMPLES+ALIGN; i++) { if (abs(stereo_a9[i] - stereo_ref[i]) > 0) { pa_log_debug("NEON/A9 %d: %d != %d (%d)", i, stereo_a9[i], stereo_ref[i], mono[i/2]); } } - for (i = 0; i < 2*SAMPLES; i++) { + for (i = ALIGN; i < 2*SAMPLES+ALIGN; i++) { if (abs(stereo_a8[i] - stereo_ref[i]) > 0) { pa_log_debug("NEON/A8 %d: %d != %d (%d)", i, stereo_a8[i], stereo_ref[i], mono[i/2]); } } - for (i = 0; i < 2*SAMPLES; i++) { + for (i = ALIGN; i < 2*SAMPLES+ALIGN; i++) { if (abs(stereo_gen[i] - stereo_ref[i]) > 0) { pa_log_debug("generic %d: %d != %d (%d)", i, stereo_gen[i], stereo_ref[i], mono[i/2]); @@ -612,38 +613,38 @@ start = pa_rtclock_now(); for (i = 0; i < TIMES; i++) { - remap_mono_to_stereo_neon_a9(&remap, stereo_a9, mono, SAMPLES); + remap_mono_to_stereo_neon_a9(&remap, stereo_a9+ALIGN, mono+ALIGN, SAMPLES); } stop = pa_rtclock_now(); pa_log_info("NEON/A9:\t%llu usec.", (long long unsigned int)(stop - start)); start = pa_rtclock_now(); for (i = 0; i < TIMES; i++) { - remap_mono_to_stereo_neon_a8(&remap, stereo_a8, mono, SAMPLES); + remap_mono_to_stereo_neon_a8(&remap, stereo_a8+ALIGN, mono+ALIGN, SAMPLES); } stop = pa_rtclock_now(); pa_log_info("NEON/A8:\t%llu usec.", (long long unsigned int)(stop - start)); start = pa_rtclock_now(); for (i = 0; i < TIMES; i++) { - remap_mono_to_stereo_c(&remap, stereo_ref, mono, SAMPLES); + remap_mono_to_stereo_c(&remap, stereo_ref+ALIGN, mono+ALIGN, SAMPLES); } stop = pa_rtclock_now(); pa_log_info("ref:\t\t%llu usec.", (long long unsigned int)(stop - start)); start = pa_rtclock_now(); for (i = 0; i < TIMES; i++) { - remap_channels_matrix_c(&remap, stereo_gen, mono, SAMPLES); + remap_channels_matrix_c(&remap, stereo_gen+ALIGN, mono+ALIGN, SAMPLES); } stop = pa_rtclock_now(); pa_log_info("generic:\t%llu usec.", (long long unsigned int)(stop - start)); } static void run_test_stereo_to_mono_s16(void) { - int16_t stereo[2*SAMPLES]; - int16_t mono_ref[SAMPLES]; - int16_t mono_gen[SAMPLES]; - int16_t mono[SAMPLES]; + int16_t stereo[2*SAMPLES+ALIGN]; + int16_t mono_ref[SAMPLES+ALIGN]; + int16_t mono_gen[SAMPLES+ALIGN]; + int16_t mono[SAMPLES+ALIGN]; int i; pa_usec_t start, stop; pa_sample_format_t sf; @@ -656,7 +657,7 @@ memset(mono_gen, 0, sizeof(mono_gen)); memset(mono, 0, sizeof(mono)); - for (i = 0; i < 2*SAMPLES; i++) { + for (i = 0; i < 2*SAMPLES+ALIGN; i++) { stereo[i] = rand() - RAND_MAX/2; } @@ -670,18 +671,18 @@ remap.o_ss = &oss; remap.map_table_i[0][0] = 0x10000; remap.map_table_i[0][1] = 0x10000; - - remap_stereo_to_mono_c(&remap, mono_ref, stereo, SAMPLES); - remap_channels_matrix_c(&remap, mono_gen, stereo, SAMPLES); - remap_stereo_to_mono_neon(&remap, mono, stereo, SAMPLES); - for (i = 0; i < SAMPLES; i++) { + remap_stereo_to_mono_c(&remap, mono_ref+ALIGN, stereo+ALIGN, SAMPLES); + remap_channels_matrix_c(&remap, mono_gen+ALIGN, stereo+ALIGN, SAMPLES); + remap_stereo_to_mono_neon(&remap, mono+ALIGN, stereo+ALIGN, SAMPLES); + + for (i = ALIGN; i < SAMPLES+ALIGN; i++) { if (abs(mono[i] - mono_ref[i]) > 0) { pa_log_debug("%d: %d != %d (%d)", i, mono[i], mono_ref[i], stereo[2*i+0], stereo[2*i+1]); } } - for (i = 0; i < SAMPLES; i++) { + for (i = ALIGN; i < SAMPLES+ALIGN; i++) { if (abs(mono[i] - mono_gen[i]) > 0) { pa_log_debug("%d: %d != %d (%d)", i, mono[i], mono_gen[i], stereo[2*i+0], stereo[2*i+1]); @@ -690,21 +691,21 @@ start = pa_rtclock_now(); for (i = 0; i < TIMES; i++) { - remap_stereo_to_mono_neon(&remap, mono, stereo, SAMPLES); + remap_stereo_to_mono_neon(&remap, mono+ALIGN, stereo+ALIGN, SAMPLES); } stop = pa_rtclock_now(); pa_log_info("NEON:\t\t%llu usec.", (long long unsigned int)(stop - start)); start = pa_rtclock_now(); for (i = 0; i < TIMES; i++) { - remap_stereo_to_mono_c(&remap, mono_ref, stereo, SAMPLES); + remap_stereo_to_mono_c(&remap, mono_ref+ALIGN, stereo+ALIGN, SAMPLES); } stop = pa_rtclock_now(); pa_log_info("ref:\t\t%llu usec.", (long long unsigned int)(stop - start)); start = pa_rtclock_now(); for (i = 0; i < TIMES; i++) { - remap_channels_matrix_c(&remap, mono_gen, stereo, SAMPLES); + remap_channels_matrix_c(&remap, mono_gen+ALIGN, stereo+ALIGN, SAMPLES); } stop = pa_rtclock_now(); pa_log_info("generic:\t%llu usec.", (long long unsigned int)(stop - start)); diff -r 1f6289166006 -r 07763f536182 sconv_neon.c --- a/sconv_neon.c Sun Jul 08 21:03:41 2012 +0200 +++ b/sconv_neon.c Sun Jul 08 21:48:08 2012 +0200 @@ -109,7 +109,7 @@ "1:\n\t" "vld1.16 {d0}, [%[src]]!\n\t" "vmovl.s16 q0, d0\n\t" - + "vcvt.f32.s32 q0, q0\n\t" "vmul.f32 q0, q0, q1\n\t" @@ -130,11 +130,12 @@ #define SAMPLES 1019 #define TIMES 100000 +#define ALIGN 1 static void run_test_from(void) { - int16_t samples[SAMPLES]; - int16_t samples_ref[SAMPLES]; - float floats[SAMPLES]; + int16_t samples[SAMPLES+ALIGN]; + int16_t samples_ref[SAMPLES+ALIGN]; + float floats[SAMPLES+ALIGN]; int i; pa_usec_t start, stop; @@ -143,14 +144,14 @@ memset(samples_ref, 0, sizeof(samples_ref)); memset(samples, 0, sizeof(samples)); - for (i = 0; i < SAMPLES; i++) { + for (i = 0; i < SAMPLES+ALIGN; i++) { floats[i] = 2.1f * (rand()/(float) RAND_MAX - 0.5f); } - pa_sconv_s16le_from_float32ne(SAMPLES, floats, samples_ref); - pa_sconv_s16le_from_f32ne_neon(SAMPLES, floats, samples); + pa_sconv_s16le_from_float32ne(SAMPLES, floats+ALIGN, samples_ref+ALIGN); + pa_sconv_s16le_from_f32ne_neon(SAMPLES, floats+ALIGN, samples+ALIGN); - for (i = 0; i < SAMPLES; i++) { + for (i = ALIGN; i < SAMPLES+ALIGN; i++) { if (abs(samples[i] - samples_ref[i]) > 0) { pa_log_debug("%d: %d != %d (%f)", i, samples[i], samples_ref[i], floats[i]); @@ -159,41 +160,39 @@ start = pa_rtclock_now(); for (i = 0; i < TIMES; i++) { - pa_sconv_s16le_from_f32ne_neon(SAMPLES, floats, samples); + pa_sconv_s16le_from_f32ne_neon(SAMPLES, floats+ALIGN, samples+ALIGN); } stop = pa_rtclock_now(); pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); start = pa_rtclock_now(); for (i = 0; i < TIMES; i++) { - pa_sconv_s16le_from_float32ne(SAMPLES, floats, samples_ref); + pa_sconv_s16le_from_float32ne(SAMPLES, floats+ALIGN, samples_ref+ALIGN); } stop = pa_rtclock_now(); pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); } static void run_test_to(void) { - int16_t samples[SAMPLES]; - float floats[SAMPLES]; - float floats_ref[SAMPLES]; + int16_t samples[SAMPLES+ALIGN]; + float floats[SAMPLES+ALIGN]; + float floats_ref[SAMPLES+ALIGN]; int i; pa_usec_t start, stop; - pa_convert_func_t func; pa_log_debug("checking NEON sconv_s16le_to_float(%d)", SAMPLES); memset(floats_ref, 0, sizeof(floats_ref)); memset(floats, 0, sizeof(float)); - for (i = 0; i < SAMPLES; i++) { + for (i = 0; i < SAMPLES+ALIGN; i++) { samples[i] = rand() - RAND_MAX/2; } - func = (pa_convert_func_t) pa_sconv_s16le_to_float32ne; - func(SAMPLES, samples, floats_ref); - pa_sconv_s16le_to_f32ne_neon(SAMPLES, samples, floats); + pa_sconv_s16le_to_float32ne(SAMPLES, samples+ALIGN, floats_ref+ALIGN); + pa_sconv_s16le_to_f32ne_neon(SAMPLES, samples+ALIGN, floats+ALIGN); - for (i = 0; i < SAMPLES; i++) { + for (i = ALIGN; i < SAMPLES+ALIGN; i++) { if (fabsf(floats[i] - floats_ref[i]) > 0.00001) { pa_log_debug("%d: %.8f != %.8f (%d)", i, floats[i], floats_ref[i], samples[i]); @@ -202,14 +201,14 @@ start = pa_rtclock_now(); for (i = 0; i < TIMES; i++) { - pa_sconv_s16le_to_f32ne_neon(SAMPLES, samples, floats); + pa_sconv_s16le_to_f32ne_neon(SAMPLES, samples+ALIGN, floats+ALIGN); } stop = pa_rtclock_now(); pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); start = pa_rtclock_now(); for (i = 0; i < TIMES; i++) { - func(SAMPLES, samples, floats_ref); + pa_sconv_s16le_to_float32ne(SAMPLES, samples+ALIGN, floats_ref+ALIGN); } stop = pa_rtclock_now(); pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); diff -r 1f6289166006 -r 07763f536182 svolume_neon.c --- a/svolume_neon.c Sun Jul 08 21:03:41 2012 +0200 +++ b/svolume_neon.c Sun Jul 08 21:48:08 2012 +0200 @@ -340,18 +340,19 @@ #define TIMES 50000 #define CHANNELS 4 #define PADDING 16 +#define ALIGN 1 static void run_test_float(void) { - float floats[SAMPLES]; - float floats_ref[SAMPLES]; - float floats_orig[SAMPLES]; + float floats[SAMPLES+ALIGN]; + float floats_ref[SAMPLES+ALIGN]; + float floats_orig[SAMPLES+ALIGN]; float volumes[CHANNELS]; unsigned i; pa_usec_t start, stop; pa_log_debug("checking NEON volume_float32ne(%d)", SAMPLES); - for (i = 0; i < SAMPLES; i++) { + for (i = 0; i < SAMPLES+ALIGN; i++) { floats_orig[i] = rand()/(float) RAND_MAX - 0.5f; } memcpy(floats_ref, floats_orig, sizeof(floats_orig)); @@ -360,10 +361,10 @@ for (i = 0; i < CHANNELS; i++) volumes[i] = 0.5f * rand() / (float) RAND_MAX; - pa_volume_float32ne_neon(floats, volumes, CHANNELS, sizeof(floats)); - pa_volume_float32ne_c(floats_ref, volumes, CHANNELS, sizeof(floats_ref)); + pa_volume_float32ne_neon(floats+ALIGN, volumes, CHANNELS, sizeof(floats)); + pa_volume_float32ne_c(floats_ref+ALIGN, volumes, CHANNELS, sizeof(floats_ref)); - for (i = 0; i < SAMPLES; i++) { + for (i = ALIGN; i < SAMPLES+ALIGN; i++) { if (fabsf(floats[i] - floats_ref[i]) > 0.00001) { pa_log_debug("%d: %.3f != %.3f (%.3f)", i, floats[i], floats_ref[i], floats_orig[i]); @@ -373,7 +374,7 @@ start = pa_rtclock_now(); for (i = 0; i < TIMES; i++) { memcpy(floats, floats_orig, sizeof(floats_orig)); - pa_volume_float32ne_neon(floats, volumes, CHANNELS, sizeof(floats)); + pa_volume_float32ne_neon(floats+ALIGN, volumes, CHANNELS, sizeof(floats)); } stop = pa_rtclock_now(); pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); @@ -381,16 +382,16 @@ start = pa_rtclock_now(); for (i = 0; i < TIMES; i++) { memcpy(floats_ref, floats_orig, sizeof(floats_orig)); - pa_volume_float32ne_c(floats_ref, volumes, CHANNELS, sizeof(floats_ref)); + pa_volume_float32ne_c(floats_ref+ALIGN, volumes, CHANNELS, sizeof(floats_ref)); } stop = pa_rtclock_now(); pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); } static void run_test_s16(void) { - int16_t samples[SAMPLES]; - int16_t samples_ref[SAMPLES]; - int16_t samples_orig[SAMPLES]; + int16_t samples[SAMPLES+ALIGN]; + int16_t samples_ref[SAMPLES+ALIGN]; + int16_t samples_orig[SAMPLES+ALIGN]; uint32_t volumes[CHANNELS + PADDING]; unsigned i, padding; pa_usec_t start, stop; @@ -408,20 +409,20 @@ for (padding = 0; padding < PADDING; padding++, i++) volumes[i] = volumes[padding]; - pa_volume_s16ne_neon(samples, volumes, CHANNELS, sizeof(samples)); - pa_volume_s16ne_c(samples_ref, volumes, CHANNELS, sizeof(samples_ref)); + pa_volume_s16ne_neon(samples+ALIGN, volumes, CHANNELS, sizeof(samples)); + pa_volume_s16ne_c(samples_ref+ALIGN, volumes, CHANNELS, sizeof(samples_ref)); - for (i = 0; i < SAMPLES; i++) { + for (i = ALIGN; i < SAMPLES+ALIGN; i++) { if (abs(samples[i] - samples_ref[i]) > 0) { pa_log_debug("%d: %d != %d (%d)", i, samples[i], samples_ref[i], samples_orig[i]); } } -exit(0); + start = pa_rtclock_now(); for (i = 0; i < TIMES; i++) { memcpy(samples, samples_orig, sizeof(samples_orig)); - pa_volume_s16ne_neon(samples, volumes, CHANNELS, sizeof(samples)); + pa_volume_s16ne_neon(samples+ALIGN, volumes, CHANNELS, sizeof(samples)); } stop = pa_rtclock_now(); pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); @@ -429,7 +430,7 @@ start = pa_rtclock_now(); for (i = 0; i < TIMES; i++) { memcpy(samples, samples_orig, sizeof(samples_orig)); - pa_volume_s16ne_arm(samples, volumes, CHANNELS, sizeof(samples)); + pa_volume_s16ne_arm(samples+ALIGN, volumes, CHANNELS, sizeof(samples)); } stop = pa_rtclock_now(); pa_log_info("ARM: %llu usec.", (long long unsigned int)(stop - start));