Mercurial > hg > pa-neon
diff remap_neon.c @ 5:07763f536182 default tip
ALIGNment support
author | Peter Meerwald <p.meerwald@bct-electronic.com> |
---|---|
date | Sun, 08 Jul 2012 21:48:08 +0200 |
parents | e889fd0e7769 |
children |
line wrap: on
line diff
--- a/remap_neon.c Sun Jul 08 21:03:41 2012 +0200 +++ b/remap_neon.c Sun Jul 08 21:48:08 2012 +0200 @@ -239,7 +239,7 @@ static void mono_to_stereo_float_neon_a8(float *dst, const float *src, unsigned n) { int i = n & 3; - + asm volatile ( "mov %[n], %[n], lsr #2\n\t" "1:\n\t" @@ -288,7 +288,7 @@ static void mono_to_stereo_int16_neon(int16_t *dst, const int16_t *src, unsigned n) { int i = n & 7; - + asm volatile ( "mov %[n], %[n], lsr #3\n\t" "1:\n\t" @@ -298,7 +298,7 @@ "vst2.16 {q0,q1}, [%[dst]]!\n\t" "bgt 1b\n\t" // output operands (or input operands that get modified) - : [dst] "+r" (dst), [src] "+r" (src), [n] "+r" (n) + : [dst] "+r" (dst), [src] "+r" (src), [n] "+r" (n) : // input operands : "memory", "cc" // clobber list ); @@ -399,13 +399,14 @@ #define SAMPLES 1019 #define TIMES 500000 +#define ALIGN 1 static void run_test_mono_to_stereo_float(void) { - float stereo_a9[2*SAMPLES]; - float stereo_a8[2*SAMPLES]; - float stereo_ref[2*SAMPLES]; - float stereo_gen[2*SAMPLES]; - float mono[SAMPLES]; + float stereo_a9[2*SAMPLES+ALIGN]; + float stereo_a8[2*SAMPLES+ALIGN]; + float stereo_ref[2*SAMPLES+ALIGN]; + float stereo_gen[2*SAMPLES+ALIGN]; + float mono[SAMPLES+ALIGN]; int i; pa_usec_t start, stop; pa_sample_format_t sf; @@ -419,7 +420,7 @@ memset(stereo_a9, 0, sizeof(stereo_a9)); memset(stereo_a8, 0, sizeof(stereo_a8)); - for (i = 0; i < SAMPLES; i++) { + for (i = 0; i < SAMPLES+ALIGN; i++) { mono[i] = rand()/(float) RAND_MAX - 0.5f; } @@ -434,24 +435,24 @@ remap.map_table_f[0][0] = 1.0; remap.map_table_f[1][0] = 1.0; - remap_mono_to_stereo_neon_a9(&remap, stereo_a9, mono, SAMPLES); - remap_mono_to_stereo_neon_a8(&remap, stereo_a8, mono, SAMPLES); - remap_mono_to_stereo_c(&remap, stereo_ref, mono, SAMPLES); - remap_channels_matrix_c(&remap, stereo_gen, mono, SAMPLES); + remap_mono_to_stereo_neon_a9(&remap, stereo_a9+ALIGN, mono+ALIGN, SAMPLES); + remap_mono_to_stereo_neon_a8(&remap, stereo_a8+ALIGN, mono+ALIGN, SAMPLES); + remap_mono_to_stereo_c(&remap, stereo_ref+ALIGN, mono+ALIGN, SAMPLES); + remap_channels_matrix_c(&remap, stereo_gen+ALIGN, mono+ALIGN, SAMPLES); - for (i = 0; i < 2*SAMPLES; i++) { + for (i = ALIGN; i < 2*SAMPLES+ALIGN; i++) { if (fabsf(stereo_a9[i] - stereo_ref[i]) > 0.00001) { pa_log_debug("NEON/A9 %d: %.3f != %.3f (%.3f)", i, stereo_a9[i], stereo_ref[i], mono[i/2]); } } - for (i = 0; i < 2*SAMPLES; i++) { + for (i = ALIGN; i < 2*SAMPLES+ALIGN; i++) { if (fabsf(stereo_a8[i] - stereo_ref[i]) > 0.00001) { pa_log_debug("NEON/A8 %d: %.3f != %.3f (%.3f)", i, stereo_a8[i], stereo_ref[i], mono[i/2]); } } - for (i = 0; i < 2*SAMPLES; i++) { + for (i = ALIGN; i < 2*SAMPLES+ALIGN; i++) { if (fabsf(stereo_gen[i] - stereo_ref[i]) > 0.00001) { pa_log_debug("generic %d: %.3f != %.3f (%.3f)", i, stereo_gen[i], stereo_ref[i], mono[i/2]); @@ -460,38 +461,38 @@ start = pa_rtclock_now(); for (i = 0; i < TIMES; i++) { - remap_mono_to_stereo_c(&remap, stereo_ref, mono, SAMPLES); + remap_mono_to_stereo_c(&remap, stereo_ref+ALIGN, mono+ALIGN, SAMPLES); } stop = pa_rtclock_now(); pa_log_info("ref:\t\t%llu usec.", (long long unsigned int)(stop - start)); start = pa_rtclock_now(); for (i = 0; i < TIMES; i++) { - remap_mono_to_stereo_neon_a9(&remap, stereo_a9, mono, SAMPLES); + remap_mono_to_stereo_neon_a9(&remap, stereo_a9+ALIGN, mono+ALIGN, SAMPLES); } stop = pa_rtclock_now(); pa_log_info("NEON/A9:\t%llu usec.", (long long unsigned int)(stop - start)); start = pa_rtclock_now(); for (i = 0; i < TIMES; i++) { - remap_mono_to_stereo_neon_a8(&remap, stereo_a8, mono, SAMPLES); + remap_mono_to_stereo_neon_a8(&remap, stereo_a8+ALIGN, mono+ALIGN, SAMPLES); } stop = pa_rtclock_now(); pa_log_info("NEON/A8:\t%llu usec.", (long long unsigned int)(stop - start)); start = pa_rtclock_now(); for (i = 0; i < TIMES; i++) { - remap_channels_matrix_c(&remap, stereo_gen, mono, SAMPLES); + remap_channels_matrix_c(&remap, stereo_gen+ALIGN, mono+ALIGN, SAMPLES); } stop = pa_rtclock_now(); pa_log_info("generic:\t%llu usec.", (long long unsigned int)(stop - start)); } static void run_test_stereo_to_mono_float(void) { - float stereo[2*SAMPLES]; - float mono_ref[SAMPLES]; - float mono_gen[SAMPLES]; - float mono[SAMPLES]; + float stereo[2*SAMPLES+ALIGN]; + float mono_ref[SAMPLES+ALIGN]; + float mono_gen[SAMPLES+ALIGN]; + float mono[SAMPLES+ALIGN]; int i; pa_usec_t start, stop; pa_sample_format_t sf; @@ -503,7 +504,7 @@ memset(mono_ref, 0, sizeof(mono_ref)); memset(mono, 0, sizeof(mono)); - for (i = 0; i < 2*SAMPLES; i++) { + for (i = 0; i < 2*SAMPLES+ALIGN; i++) { stereo[i] = rand()/(float) RAND_MAX - 0.5f; } @@ -518,11 +519,11 @@ remap.map_table_f[0][0] = 1.0; remap.map_table_f[0][1] = 1.0; - remap_stereo_to_mono_c(&remap, mono_ref, stereo, SAMPLES); - remap_channels_matrix_c(&remap, mono_gen, stereo, SAMPLES); - remap_stereo_to_mono_neon(&remap, mono, stereo, SAMPLES); + remap_stereo_to_mono_c(&remap, mono_ref+ALIGN, stereo+ALIGN, SAMPLES); + remap_channels_matrix_c(&remap, mono_gen+ALIGN, stereo+ALIGN, SAMPLES); + remap_stereo_to_mono_neon(&remap, mono+ALIGN, stereo+ALIGN, SAMPLES); - for (i = 0; i < SAMPLES; i++) { + for (i = ALIGN; i < SAMPLES+ALIGN; i++) { if (fabsf(mono[i] - mono_ref[i]) > 0.00001) { pa_log_debug("%d: %.3f != %.3f (%.3f %0.3f)", i, mono[i], mono_ref[i], stereo[2*i+0], stereo[2*i+1]); @@ -531,32 +532,32 @@ start = pa_rtclock_now(); for (i = 0; i < TIMES; i++) { - remap_stereo_to_mono_neon(&remap, mono, stereo, SAMPLES); + remap_stereo_to_mono_neon(&remap, mono+ALIGN, stereo+ALIGN, SAMPLES); } stop = pa_rtclock_now(); pa_log_info("NEON:\t\t%llu usec.", (long long unsigned int)(stop - start)); start = pa_rtclock_now(); for (i = 0; i < TIMES; i++) { - remap_stereo_to_mono_c(&remap, mono_ref, stereo, SAMPLES); + remap_stereo_to_mono_c(&remap, mono_ref+ALIGN, stereo+ALIGN, SAMPLES); } stop = pa_rtclock_now(); pa_log_info("ref:\t\t%llu usec.", (long long unsigned int)(stop - start)); start = pa_rtclock_now(); for (i = 0; i < TIMES; i++) { - remap_channels_matrix_c(&remap, mono_gen, stereo, SAMPLES); + remap_channels_matrix_c(&remap, mono_gen+ALIGN, stereo+ALIGN, SAMPLES); } stop = pa_rtclock_now(); pa_log_info("generic:\t%llu usec.", (long long unsigned int)(stop - start)); } static void run_test_mono_to_stereo_s16(void) { - int16_t stereo_a9[2*SAMPLES]; - int16_t stereo_a8[2*SAMPLES]; - int16_t stereo_ref[2*SAMPLES]; - int16_t stereo_gen[2*SAMPLES]; - int16_t mono[SAMPLES]; + int16_t stereo_a9[2*SAMPLES+ALIGN]; + int16_t stereo_a8[2*SAMPLES+ALIGN]; + int16_t stereo_ref[2*SAMPLES+ALIGN]; + int16_t stereo_gen[2*SAMPLES+ALIGN]; + int16_t mono[SAMPLES+ALIGN]; int i; pa_usec_t start, stop; pa_sample_format_t sf; @@ -570,7 +571,7 @@ memset(stereo_a8, 0, sizeof(stereo_a8)); memset(stereo_gen, 0, sizeof(stereo_gen)); - for (i = 0; i < SAMPLES; i++) { + for (i = 0; i < SAMPLES+ALIGN; i++) { mono[i] = rand() - RAND_MAX/2; } @@ -584,26 +585,26 @@ remap.o_ss = &oss; remap.map_table_i[0][0] = 0x10000; remap.map_table_i[1][0] = 0x10000; - - remap_mono_to_stereo_c(&remap, stereo_ref, mono, SAMPLES); - remap_channels_matrix_c(&remap, stereo_gen, mono, SAMPLES); - remap_mono_to_stereo_neon_a9(&remap, stereo_a9, mono, SAMPLES); - remap_mono_to_stereo_neon_a8(&remap, stereo_a8, mono, SAMPLES); - - for (i = 0; i < 2*SAMPLES; i++) { + + remap_mono_to_stereo_c(&remap, stereo_ref+ALIGN, mono+ALIGN, SAMPLES); + remap_channels_matrix_c(&remap, stereo_gen+ALIGN, mono+ALIGN, SAMPLES); + remap_mono_to_stereo_neon_a9(&remap, stereo_a9+ALIGN, mono+ALIGN, SAMPLES); + remap_mono_to_stereo_neon_a8(&remap, stereo_a8+ALIGN, mono+ALIGN, SAMPLES); + + for (i = ALIGN; i < 2*SAMPLES+ALIGN; i++) { if (abs(stereo_a9[i] - stereo_ref[i]) > 0) { pa_log_debug("NEON/A9 %d: %d != %d (%d)", i, stereo_a9[i], stereo_ref[i], mono[i/2]); } } - for (i = 0; i < 2*SAMPLES; i++) { + for (i = ALIGN; i < 2*SAMPLES+ALIGN; i++) { if (abs(stereo_a8[i] - stereo_ref[i]) > 0) { pa_log_debug("NEON/A8 %d: %d != %d (%d)", i, stereo_a8[i], stereo_ref[i], mono[i/2]); } } - for (i = 0; i < 2*SAMPLES; i++) { + for (i = ALIGN; i < 2*SAMPLES+ALIGN; i++) { if (abs(stereo_gen[i] - stereo_ref[i]) > 0) { pa_log_debug("generic %d: %d != %d (%d)", i, stereo_gen[i], stereo_ref[i], mono[i/2]); @@ -612,38 +613,38 @@ start = pa_rtclock_now(); for (i = 0; i < TIMES; i++) { - remap_mono_to_stereo_neon_a9(&remap, stereo_a9, mono, SAMPLES); + remap_mono_to_stereo_neon_a9(&remap, stereo_a9+ALIGN, mono+ALIGN, SAMPLES); } stop = pa_rtclock_now(); pa_log_info("NEON/A9:\t%llu usec.", (long long unsigned int)(stop - start)); start = pa_rtclock_now(); for (i = 0; i < TIMES; i++) { - remap_mono_to_stereo_neon_a8(&remap, stereo_a8, mono, SAMPLES); + remap_mono_to_stereo_neon_a8(&remap, stereo_a8+ALIGN, mono+ALIGN, SAMPLES); } stop = pa_rtclock_now(); pa_log_info("NEON/A8:\t%llu usec.", (long long unsigned int)(stop - start)); start = pa_rtclock_now(); for (i = 0; i < TIMES; i++) { - remap_mono_to_stereo_c(&remap, stereo_ref, mono, SAMPLES); + remap_mono_to_stereo_c(&remap, stereo_ref+ALIGN, mono+ALIGN, SAMPLES); } stop = pa_rtclock_now(); pa_log_info("ref:\t\t%llu usec.", (long long unsigned int)(stop - start)); start = pa_rtclock_now(); for (i = 0; i < TIMES; i++) { - remap_channels_matrix_c(&remap, stereo_gen, mono, SAMPLES); + remap_channels_matrix_c(&remap, stereo_gen+ALIGN, mono+ALIGN, SAMPLES); } stop = pa_rtclock_now(); pa_log_info("generic:\t%llu usec.", (long long unsigned int)(stop - start)); } static void run_test_stereo_to_mono_s16(void) { - int16_t stereo[2*SAMPLES]; - int16_t mono_ref[SAMPLES]; - int16_t mono_gen[SAMPLES]; - int16_t mono[SAMPLES]; + int16_t stereo[2*SAMPLES+ALIGN]; + int16_t mono_ref[SAMPLES+ALIGN]; + int16_t mono_gen[SAMPLES+ALIGN]; + int16_t mono[SAMPLES+ALIGN]; int i; pa_usec_t start, stop; pa_sample_format_t sf; @@ -656,7 +657,7 @@ memset(mono_gen, 0, sizeof(mono_gen)); memset(mono, 0, sizeof(mono)); - for (i = 0; i < 2*SAMPLES; i++) { + for (i = 0; i < 2*SAMPLES+ALIGN; i++) { stereo[i] = rand() - RAND_MAX/2; } @@ -670,18 +671,18 @@ remap.o_ss = &oss; remap.map_table_i[0][0] = 0x10000; remap.map_table_i[0][1] = 0x10000; - - remap_stereo_to_mono_c(&remap, mono_ref, stereo, SAMPLES); - remap_channels_matrix_c(&remap, mono_gen, stereo, SAMPLES); - remap_stereo_to_mono_neon(&remap, mono, stereo, SAMPLES); - for (i = 0; i < SAMPLES; i++) { + remap_stereo_to_mono_c(&remap, mono_ref+ALIGN, stereo+ALIGN, SAMPLES); + remap_channels_matrix_c(&remap, mono_gen+ALIGN, stereo+ALIGN, SAMPLES); + remap_stereo_to_mono_neon(&remap, mono+ALIGN, stereo+ALIGN, SAMPLES); + + for (i = ALIGN; i < SAMPLES+ALIGN; i++) { if (abs(mono[i] - mono_ref[i]) > 0) { pa_log_debug("%d: %d != %d (%d)", i, mono[i], mono_ref[i], stereo[2*i+0], stereo[2*i+1]); } } - for (i = 0; i < SAMPLES; i++) { + for (i = ALIGN; i < SAMPLES+ALIGN; i++) { if (abs(mono[i] - mono_gen[i]) > 0) { pa_log_debug("%d: %d != %d (%d)", i, mono[i], mono_gen[i], stereo[2*i+0], stereo[2*i+1]); @@ -690,21 +691,21 @@ start = pa_rtclock_now(); for (i = 0; i < TIMES; i++) { - remap_stereo_to_mono_neon(&remap, mono, stereo, SAMPLES); + remap_stereo_to_mono_neon(&remap, mono+ALIGN, stereo+ALIGN, SAMPLES); } stop = pa_rtclock_now(); pa_log_info("NEON:\t\t%llu usec.", (long long unsigned int)(stop - start)); start = pa_rtclock_now(); for (i = 0; i < TIMES; i++) { - remap_stereo_to_mono_c(&remap, mono_ref, stereo, SAMPLES); + remap_stereo_to_mono_c(&remap, mono_ref+ALIGN, stereo+ALIGN, SAMPLES); } stop = pa_rtclock_now(); pa_log_info("ref:\t\t%llu usec.", (long long unsigned int)(stop - start)); start = pa_rtclock_now(); for (i = 0; i < TIMES; i++) { - remap_channels_matrix_c(&remap, mono_gen, stereo, SAMPLES); + remap_channels_matrix_c(&remap, mono_gen+ALIGN, stereo+ALIGN, SAMPLES); } stop = pa_rtclock_now(); pa_log_info("generic:\t%llu usec.", (long long unsigned int)(stop - start));