# HG changeset patch
# User Peter Meerwald
# Date 1341776888 -7200
# Node ID 07763f53618212244b45358e7bb2736f3be23ac2
# Parent 1f6289166006786b83c76ae4937aacb58ba7edd4
ALIGNment support
diff -r 1f6289166006 -r 07763f536182 remap_neon.c
--- a/remap_neon.c Sun Jul 08 21:03:41 2012 +0200
+++ b/remap_neon.c Sun Jul 08 21:48:08 2012 +0200
@@ -239,7 +239,7 @@
static void mono_to_stereo_float_neon_a8(float *dst, const float *src, unsigned n) {
int i = n & 3;
-
+
asm volatile (
"mov %[n], %[n], lsr #2\n\t"
"1:\n\t"
@@ -288,7 +288,7 @@
static void mono_to_stereo_int16_neon(int16_t *dst, const int16_t *src, unsigned n) {
int i = n & 7;
-
+
asm volatile (
"mov %[n], %[n], lsr #3\n\t"
"1:\n\t"
@@ -298,7 +298,7 @@
"vst2.16 {q0,q1}, [%[dst]]!\n\t"
"bgt 1b\n\t"
// output operands (or input operands that get modified)
- : [dst] "+r" (dst), [src] "+r" (src), [n] "+r" (n)
+ : [dst] "+r" (dst), [src] "+r" (src), [n] "+r" (n)
: // input operands
: "memory", "cc" // clobber list
);
@@ -399,13 +399,14 @@
#define SAMPLES 1019
#define TIMES 500000
+#define ALIGN 1
static void run_test_mono_to_stereo_float(void) {
- float stereo_a9[2*SAMPLES];
- float stereo_a8[2*SAMPLES];
- float stereo_ref[2*SAMPLES];
- float stereo_gen[2*SAMPLES];
- float mono[SAMPLES];
+ float stereo_a9[2*SAMPLES+ALIGN];
+ float stereo_a8[2*SAMPLES+ALIGN];
+ float stereo_ref[2*SAMPLES+ALIGN];
+ float stereo_gen[2*SAMPLES+ALIGN];
+ float mono[SAMPLES+ALIGN];
int i;
pa_usec_t start, stop;
pa_sample_format_t sf;
@@ -419,7 +420,7 @@
memset(stereo_a9, 0, sizeof(stereo_a9));
memset(stereo_a8, 0, sizeof(stereo_a8));
- for (i = 0; i < SAMPLES; i++) {
+ for (i = 0; i < SAMPLES+ALIGN; i++) {
mono[i] = rand()/(float) RAND_MAX - 0.5f;
}
@@ -434,24 +435,24 @@
remap.map_table_f[0][0] = 1.0;
remap.map_table_f[1][0] = 1.0;
- remap_mono_to_stereo_neon_a9(&remap, stereo_a9, mono, SAMPLES);
- remap_mono_to_stereo_neon_a8(&remap, stereo_a8, mono, SAMPLES);
- remap_mono_to_stereo_c(&remap, stereo_ref, mono, SAMPLES);
- remap_channels_matrix_c(&remap, stereo_gen, mono, SAMPLES);
+ remap_mono_to_stereo_neon_a9(&remap, stereo_a9+ALIGN, mono+ALIGN, SAMPLES);
+ remap_mono_to_stereo_neon_a8(&remap, stereo_a8+ALIGN, mono+ALIGN, SAMPLES);
+ remap_mono_to_stereo_c(&remap, stereo_ref+ALIGN, mono+ALIGN, SAMPLES);
+ remap_channels_matrix_c(&remap, stereo_gen+ALIGN, mono+ALIGN, SAMPLES);
- for (i = 0; i < 2*SAMPLES; i++) {
+ for (i = ALIGN; i < 2*SAMPLES+ALIGN; i++) {
if (fabsf(stereo_a9[i] - stereo_ref[i]) > 0.00001) {
pa_log_debug("NEON/A9 %d: %.3f != %.3f (%.3f)", i, stereo_a9[i], stereo_ref[i],
mono[i/2]);
}
}
- for (i = 0; i < 2*SAMPLES; i++) {
+ for (i = ALIGN; i < 2*SAMPLES+ALIGN; i++) {
if (fabsf(stereo_a8[i] - stereo_ref[i]) > 0.00001) {
pa_log_debug("NEON/A8 %d: %.3f != %.3f (%.3f)", i, stereo_a8[i], stereo_ref[i],
mono[i/2]);
}
}
- for (i = 0; i < 2*SAMPLES; i++) {
+ for (i = ALIGN; i < 2*SAMPLES+ALIGN; i++) {
if (fabsf(stereo_gen[i] - stereo_ref[i]) > 0.00001) {
pa_log_debug("generic %d: %.3f != %.3f (%.3f)", i, stereo_gen[i], stereo_ref[i],
mono[i/2]);
@@ -460,38 +461,38 @@
start = pa_rtclock_now();
for (i = 0; i < TIMES; i++) {
- remap_mono_to_stereo_c(&remap, stereo_ref, mono, SAMPLES);
+ remap_mono_to_stereo_c(&remap, stereo_ref+ALIGN, mono+ALIGN, SAMPLES);
}
stop = pa_rtclock_now();
pa_log_info("ref:\t\t%llu usec.", (long long unsigned int)(stop - start));
start = pa_rtclock_now();
for (i = 0; i < TIMES; i++) {
- remap_mono_to_stereo_neon_a9(&remap, stereo_a9, mono, SAMPLES);
+ remap_mono_to_stereo_neon_a9(&remap, stereo_a9+ALIGN, mono+ALIGN, SAMPLES);
}
stop = pa_rtclock_now();
pa_log_info("NEON/A9:\t%llu usec.", (long long unsigned int)(stop - start));
start = pa_rtclock_now();
for (i = 0; i < TIMES; i++) {
- remap_mono_to_stereo_neon_a8(&remap, stereo_a8, mono, SAMPLES);
+ remap_mono_to_stereo_neon_a8(&remap, stereo_a8+ALIGN, mono+ALIGN, SAMPLES);
}
stop = pa_rtclock_now();
pa_log_info("NEON/A8:\t%llu usec.", (long long unsigned int)(stop - start));
start = pa_rtclock_now();
for (i = 0; i < TIMES; i++) {
- remap_channels_matrix_c(&remap, stereo_gen, mono, SAMPLES);
+ remap_channels_matrix_c(&remap, stereo_gen+ALIGN, mono+ALIGN, SAMPLES);
}
stop = pa_rtclock_now();
pa_log_info("generic:\t%llu usec.", (long long unsigned int)(stop - start));
}
static void run_test_stereo_to_mono_float(void) {
- float stereo[2*SAMPLES];
- float mono_ref[SAMPLES];
- float mono_gen[SAMPLES];
- float mono[SAMPLES];
+ float stereo[2*SAMPLES+ALIGN];
+ float mono_ref[SAMPLES+ALIGN];
+ float mono_gen[SAMPLES+ALIGN];
+ float mono[SAMPLES+ALIGN];
int i;
pa_usec_t start, stop;
pa_sample_format_t sf;
@@ -503,7 +504,7 @@
memset(mono_ref, 0, sizeof(mono_ref));
memset(mono, 0, sizeof(mono));
- for (i = 0; i < 2*SAMPLES; i++) {
+ for (i = 0; i < 2*SAMPLES+ALIGN; i++) {
stereo[i] = rand()/(float) RAND_MAX - 0.5f;
}
@@ -518,11 +519,11 @@
remap.map_table_f[0][0] = 1.0;
remap.map_table_f[0][1] = 1.0;
- remap_stereo_to_mono_c(&remap, mono_ref, stereo, SAMPLES);
- remap_channels_matrix_c(&remap, mono_gen, stereo, SAMPLES);
- remap_stereo_to_mono_neon(&remap, mono, stereo, SAMPLES);
+ remap_stereo_to_mono_c(&remap, mono_ref+ALIGN, stereo+ALIGN, SAMPLES);
+ remap_channels_matrix_c(&remap, mono_gen+ALIGN, stereo+ALIGN, SAMPLES);
+ remap_stereo_to_mono_neon(&remap, mono+ALIGN, stereo+ALIGN, SAMPLES);
- for (i = 0; i < SAMPLES; i++) {
+ for (i = ALIGN; i < SAMPLES+ALIGN; i++) {
if (fabsf(mono[i] - mono_ref[i]) > 0.00001) {
pa_log_debug("%d: %.3f != %.3f (%.3f %0.3f)", i, mono[i], mono_ref[i],
stereo[2*i+0], stereo[2*i+1]);
@@ -531,32 +532,32 @@
start = pa_rtclock_now();
for (i = 0; i < TIMES; i++) {
- remap_stereo_to_mono_neon(&remap, mono, stereo, SAMPLES);
+ remap_stereo_to_mono_neon(&remap, mono+ALIGN, stereo+ALIGN, SAMPLES);
}
stop = pa_rtclock_now();
pa_log_info("NEON:\t\t%llu usec.", (long long unsigned int)(stop - start));
start = pa_rtclock_now();
for (i = 0; i < TIMES; i++) {
- remap_stereo_to_mono_c(&remap, mono_ref, stereo, SAMPLES);
+ remap_stereo_to_mono_c(&remap, mono_ref+ALIGN, stereo+ALIGN, SAMPLES);
}
stop = pa_rtclock_now();
pa_log_info("ref:\t\t%llu usec.", (long long unsigned int)(stop - start));
start = pa_rtclock_now();
for (i = 0; i < TIMES; i++) {
- remap_channels_matrix_c(&remap, mono_gen, stereo, SAMPLES);
+ remap_channels_matrix_c(&remap, mono_gen+ALIGN, stereo+ALIGN, SAMPLES);
}
stop = pa_rtclock_now();
pa_log_info("generic:\t%llu usec.", (long long unsigned int)(stop - start));
}
static void run_test_mono_to_stereo_s16(void) {
- int16_t stereo_a9[2*SAMPLES];
- int16_t stereo_a8[2*SAMPLES];
- int16_t stereo_ref[2*SAMPLES];
- int16_t stereo_gen[2*SAMPLES];
- int16_t mono[SAMPLES];
+ int16_t stereo_a9[2*SAMPLES+ALIGN];
+ int16_t stereo_a8[2*SAMPLES+ALIGN];
+ int16_t stereo_ref[2*SAMPLES+ALIGN];
+ int16_t stereo_gen[2*SAMPLES+ALIGN];
+ int16_t mono[SAMPLES+ALIGN];
int i;
pa_usec_t start, stop;
pa_sample_format_t sf;
@@ -570,7 +571,7 @@
memset(stereo_a8, 0, sizeof(stereo_a8));
memset(stereo_gen, 0, sizeof(stereo_gen));
- for (i = 0; i < SAMPLES; i++) {
+ for (i = 0; i < SAMPLES+ALIGN; i++) {
mono[i] = rand() - RAND_MAX/2;
}
@@ -584,26 +585,26 @@
remap.o_ss = &oss;
remap.map_table_i[0][0] = 0x10000;
remap.map_table_i[1][0] = 0x10000;
-
- remap_mono_to_stereo_c(&remap, stereo_ref, mono, SAMPLES);
- remap_channels_matrix_c(&remap, stereo_gen, mono, SAMPLES);
- remap_mono_to_stereo_neon_a9(&remap, stereo_a9, mono, SAMPLES);
- remap_mono_to_stereo_neon_a8(&remap, stereo_a8, mono, SAMPLES);
-
- for (i = 0; i < 2*SAMPLES; i++) {
+
+ remap_mono_to_stereo_c(&remap, stereo_ref+ALIGN, mono+ALIGN, SAMPLES);
+ remap_channels_matrix_c(&remap, stereo_gen+ALIGN, mono+ALIGN, SAMPLES);
+ remap_mono_to_stereo_neon_a9(&remap, stereo_a9+ALIGN, mono+ALIGN, SAMPLES);
+ remap_mono_to_stereo_neon_a8(&remap, stereo_a8+ALIGN, mono+ALIGN, SAMPLES);
+
+ for (i = ALIGN; i < 2*SAMPLES+ALIGN; i++) {
if (abs(stereo_a9[i] - stereo_ref[i]) > 0) {
pa_log_debug("NEON/A9 %d: %d != %d (%d)", i, stereo_a9[i], stereo_ref[i],
mono[i/2]);
}
}
- for (i = 0; i < 2*SAMPLES; i++) {
+ for (i = ALIGN; i < 2*SAMPLES+ALIGN; i++) {
if (abs(stereo_a8[i] - stereo_ref[i]) > 0) {
pa_log_debug("NEON/A8 %d: %d != %d (%d)", i, stereo_a8[i], stereo_ref[i],
mono[i/2]);
}
}
- for (i = 0; i < 2*SAMPLES; i++) {
+ for (i = ALIGN; i < 2*SAMPLES+ALIGN; i++) {
if (abs(stereo_gen[i] - stereo_ref[i]) > 0) {
pa_log_debug("generic %d: %d != %d (%d)", i, stereo_gen[i], stereo_ref[i],
mono[i/2]);
@@ -612,38 +613,38 @@
start = pa_rtclock_now();
for (i = 0; i < TIMES; i++) {
- remap_mono_to_stereo_neon_a9(&remap, stereo_a9, mono, SAMPLES);
+ remap_mono_to_stereo_neon_a9(&remap, stereo_a9+ALIGN, mono+ALIGN, SAMPLES);
}
stop = pa_rtclock_now();
pa_log_info("NEON/A9:\t%llu usec.", (long long unsigned int)(stop - start));
start = pa_rtclock_now();
for (i = 0; i < TIMES; i++) {
- remap_mono_to_stereo_neon_a8(&remap, stereo_a8, mono, SAMPLES);
+ remap_mono_to_stereo_neon_a8(&remap, stereo_a8+ALIGN, mono+ALIGN, SAMPLES);
}
stop = pa_rtclock_now();
pa_log_info("NEON/A8:\t%llu usec.", (long long unsigned int)(stop - start));
start = pa_rtclock_now();
for (i = 0; i < TIMES; i++) {
- remap_mono_to_stereo_c(&remap, stereo_ref, mono, SAMPLES);
+ remap_mono_to_stereo_c(&remap, stereo_ref+ALIGN, mono+ALIGN, SAMPLES);
}
stop = pa_rtclock_now();
pa_log_info("ref:\t\t%llu usec.", (long long unsigned int)(stop - start));
start = pa_rtclock_now();
for (i = 0; i < TIMES; i++) {
- remap_channels_matrix_c(&remap, stereo_gen, mono, SAMPLES);
+ remap_channels_matrix_c(&remap, stereo_gen+ALIGN, mono+ALIGN, SAMPLES);
}
stop = pa_rtclock_now();
pa_log_info("generic:\t%llu usec.", (long long unsigned int)(stop - start));
}
static void run_test_stereo_to_mono_s16(void) {
- int16_t stereo[2*SAMPLES];
- int16_t mono_ref[SAMPLES];
- int16_t mono_gen[SAMPLES];
- int16_t mono[SAMPLES];
+ int16_t stereo[2*SAMPLES+ALIGN];
+ int16_t mono_ref[SAMPLES+ALIGN];
+ int16_t mono_gen[SAMPLES+ALIGN];
+ int16_t mono[SAMPLES+ALIGN];
int i;
pa_usec_t start, stop;
pa_sample_format_t sf;
@@ -656,7 +657,7 @@
memset(mono_gen, 0, sizeof(mono_gen));
memset(mono, 0, sizeof(mono));
- for (i = 0; i < 2*SAMPLES; i++) {
+ for (i = 0; i < 2*SAMPLES+ALIGN; i++) {
stereo[i] = rand() - RAND_MAX/2;
}
@@ -670,18 +671,18 @@
remap.o_ss = &oss;
remap.map_table_i[0][0] = 0x10000;
remap.map_table_i[0][1] = 0x10000;
-
- remap_stereo_to_mono_c(&remap, mono_ref, stereo, SAMPLES);
- remap_channels_matrix_c(&remap, mono_gen, stereo, SAMPLES);
- remap_stereo_to_mono_neon(&remap, mono, stereo, SAMPLES);
- for (i = 0; i < SAMPLES; i++) {
+ remap_stereo_to_mono_c(&remap, mono_ref+ALIGN, stereo+ALIGN, SAMPLES);
+ remap_channels_matrix_c(&remap, mono_gen+ALIGN, stereo+ALIGN, SAMPLES);
+ remap_stereo_to_mono_neon(&remap, mono+ALIGN, stereo+ALIGN, SAMPLES);
+
+ for (i = ALIGN; i < SAMPLES+ALIGN; i++) {
if (abs(mono[i] - mono_ref[i]) > 0) {
pa_log_debug("%d: %d != %d (%d)", i, mono[i], mono_ref[i],
stereo[2*i+0], stereo[2*i+1]);
}
}
- for (i = 0; i < SAMPLES; i++) {
+ for (i = ALIGN; i < SAMPLES+ALIGN; i++) {
if (abs(mono[i] - mono_gen[i]) > 0) {
pa_log_debug("%d: %d != %d (%d)", i, mono[i], mono_gen[i],
stereo[2*i+0], stereo[2*i+1]);
@@ -690,21 +691,21 @@
start = pa_rtclock_now();
for (i = 0; i < TIMES; i++) {
- remap_stereo_to_mono_neon(&remap, mono, stereo, SAMPLES);
+ remap_stereo_to_mono_neon(&remap, mono+ALIGN, stereo+ALIGN, SAMPLES);
}
stop = pa_rtclock_now();
pa_log_info("NEON:\t\t%llu usec.", (long long unsigned int)(stop - start));
start = pa_rtclock_now();
for (i = 0; i < TIMES; i++) {
- remap_stereo_to_mono_c(&remap, mono_ref, stereo, SAMPLES);
+ remap_stereo_to_mono_c(&remap, mono_ref+ALIGN, stereo+ALIGN, SAMPLES);
}
stop = pa_rtclock_now();
pa_log_info("ref:\t\t%llu usec.", (long long unsigned int)(stop - start));
start = pa_rtclock_now();
for (i = 0; i < TIMES; i++) {
- remap_channels_matrix_c(&remap, mono_gen, stereo, SAMPLES);
+ remap_channels_matrix_c(&remap, mono_gen+ALIGN, stereo+ALIGN, SAMPLES);
}
stop = pa_rtclock_now();
pa_log_info("generic:\t%llu usec.", (long long unsigned int)(stop - start));
diff -r 1f6289166006 -r 07763f536182 sconv_neon.c
--- a/sconv_neon.c Sun Jul 08 21:03:41 2012 +0200
+++ b/sconv_neon.c Sun Jul 08 21:48:08 2012 +0200
@@ -109,7 +109,7 @@
"1:\n\t"
"vld1.16 {d0}, [%[src]]!\n\t"
"vmovl.s16 q0, d0\n\t"
-
+
"vcvt.f32.s32 q0, q0\n\t"
"vmul.f32 q0, q0, q1\n\t"
@@ -130,11 +130,12 @@
#define SAMPLES 1019
#define TIMES 100000
+#define ALIGN 1
static void run_test_from(void) {
- int16_t samples[SAMPLES];
- int16_t samples_ref[SAMPLES];
- float floats[SAMPLES];
+ int16_t samples[SAMPLES+ALIGN];
+ int16_t samples_ref[SAMPLES+ALIGN];
+ float floats[SAMPLES+ALIGN];
int i;
pa_usec_t start, stop;
@@ -143,14 +144,14 @@
memset(samples_ref, 0, sizeof(samples_ref));
memset(samples, 0, sizeof(samples));
- for (i = 0; i < SAMPLES; i++) {
+ for (i = 0; i < SAMPLES+ALIGN; i++) {
floats[i] = 2.1f * (rand()/(float) RAND_MAX - 0.5f);
}
- pa_sconv_s16le_from_float32ne(SAMPLES, floats, samples_ref);
- pa_sconv_s16le_from_f32ne_neon(SAMPLES, floats, samples);
+ pa_sconv_s16le_from_float32ne(SAMPLES, floats+ALIGN, samples_ref+ALIGN);
+ pa_sconv_s16le_from_f32ne_neon(SAMPLES, floats+ALIGN, samples+ALIGN);
- for (i = 0; i < SAMPLES; i++) {
+ for (i = ALIGN; i < SAMPLES+ALIGN; i++) {
if (abs(samples[i] - samples_ref[i]) > 0) {
pa_log_debug("%d: %d != %d (%f)", i, samples[i], samples_ref[i],
floats[i]);
@@ -159,41 +160,39 @@
start = pa_rtclock_now();
for (i = 0; i < TIMES; i++) {
- pa_sconv_s16le_from_f32ne_neon(SAMPLES, floats, samples);
+ pa_sconv_s16le_from_f32ne_neon(SAMPLES, floats+ALIGN, samples+ALIGN);
}
stop = pa_rtclock_now();
pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start));
start = pa_rtclock_now();
for (i = 0; i < TIMES; i++) {
- pa_sconv_s16le_from_float32ne(SAMPLES, floats, samples_ref);
+ pa_sconv_s16le_from_float32ne(SAMPLES, floats+ALIGN, samples_ref+ALIGN);
}
stop = pa_rtclock_now();
pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start));
}
static void run_test_to(void) {
- int16_t samples[SAMPLES];
- float floats[SAMPLES];
- float floats_ref[SAMPLES];
+ int16_t samples[SAMPLES+ALIGN];
+ float floats[SAMPLES+ALIGN];
+ float floats_ref[SAMPLES+ALIGN];
int i;
pa_usec_t start, stop;
- pa_convert_func_t func;
pa_log_debug("checking NEON sconv_s16le_to_float(%d)", SAMPLES);
memset(floats_ref, 0, sizeof(floats_ref));
memset(floats, 0, sizeof(float));
- for (i = 0; i < SAMPLES; i++) {
+ for (i = 0; i < SAMPLES+ALIGN; i++) {
samples[i] = rand() - RAND_MAX/2;
}
- func = (pa_convert_func_t) pa_sconv_s16le_to_float32ne;
- func(SAMPLES, samples, floats_ref);
- pa_sconv_s16le_to_f32ne_neon(SAMPLES, samples, floats);
+ pa_sconv_s16le_to_float32ne(SAMPLES, samples+ALIGN, floats_ref+ALIGN);
+ pa_sconv_s16le_to_f32ne_neon(SAMPLES, samples+ALIGN, floats+ALIGN);
- for (i = 0; i < SAMPLES; i++) {
+ for (i = ALIGN; i < SAMPLES+ALIGN; i++) {
if (fabsf(floats[i] - floats_ref[i]) > 0.00001) {
pa_log_debug("%d: %.8f != %.8f (%d)", i, floats[i], floats_ref[i],
samples[i]);
@@ -202,14 +201,14 @@
start = pa_rtclock_now();
for (i = 0; i < TIMES; i++) {
- pa_sconv_s16le_to_f32ne_neon(SAMPLES, samples, floats);
+ pa_sconv_s16le_to_f32ne_neon(SAMPLES, samples+ALIGN, floats+ALIGN);
}
stop = pa_rtclock_now();
pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start));
start = pa_rtclock_now();
for (i = 0; i < TIMES; i++) {
- func(SAMPLES, samples, floats_ref);
+ pa_sconv_s16le_to_float32ne(SAMPLES, samples+ALIGN, floats_ref+ALIGN);
}
stop = pa_rtclock_now();
pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start));
diff -r 1f6289166006 -r 07763f536182 svolume_neon.c
--- a/svolume_neon.c Sun Jul 08 21:03:41 2012 +0200
+++ b/svolume_neon.c Sun Jul 08 21:48:08 2012 +0200
@@ -340,18 +340,19 @@
#define TIMES 50000
#define CHANNELS 4
#define PADDING 16
+#define ALIGN 1
static void run_test_float(void) {
- float floats[SAMPLES];
- float floats_ref[SAMPLES];
- float floats_orig[SAMPLES];
+ float floats[SAMPLES+ALIGN];
+ float floats_ref[SAMPLES+ALIGN];
+ float floats_orig[SAMPLES+ALIGN];
float volumes[CHANNELS];
unsigned i;
pa_usec_t start, stop;
pa_log_debug("checking NEON volume_float32ne(%d)", SAMPLES);
- for (i = 0; i < SAMPLES; i++) {
+ for (i = 0; i < SAMPLES+ALIGN; i++) {
floats_orig[i] = rand()/(float) RAND_MAX - 0.5f;
}
memcpy(floats_ref, floats_orig, sizeof(floats_orig));
@@ -360,10 +361,10 @@
for (i = 0; i < CHANNELS; i++)
volumes[i] = 0.5f * rand() / (float) RAND_MAX;
- pa_volume_float32ne_neon(floats, volumes, CHANNELS, sizeof(floats));
- pa_volume_float32ne_c(floats_ref, volumes, CHANNELS, sizeof(floats_ref));
+ pa_volume_float32ne_neon(floats+ALIGN, volumes, CHANNELS, sizeof(floats));
+ pa_volume_float32ne_c(floats_ref+ALIGN, volumes, CHANNELS, sizeof(floats_ref));
- for (i = 0; i < SAMPLES; i++) {
+ for (i = ALIGN; i < SAMPLES+ALIGN; i++) {
if (fabsf(floats[i] - floats_ref[i]) > 0.00001) {
pa_log_debug("%d: %.3f != %.3f (%.3f)", i, floats[i], floats_ref[i],
floats_orig[i]);
@@ -373,7 +374,7 @@
start = pa_rtclock_now();
for (i = 0; i < TIMES; i++) {
memcpy(floats, floats_orig, sizeof(floats_orig));
- pa_volume_float32ne_neon(floats, volumes, CHANNELS, sizeof(floats));
+ pa_volume_float32ne_neon(floats+ALIGN, volumes, CHANNELS, sizeof(floats));
}
stop = pa_rtclock_now();
pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start));
@@ -381,16 +382,16 @@
start = pa_rtclock_now();
for (i = 0; i < TIMES; i++) {
memcpy(floats_ref, floats_orig, sizeof(floats_orig));
- pa_volume_float32ne_c(floats_ref, volumes, CHANNELS, sizeof(floats_ref));
+ pa_volume_float32ne_c(floats_ref+ALIGN, volumes, CHANNELS, sizeof(floats_ref));
}
stop = pa_rtclock_now();
pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start));
}
static void run_test_s16(void) {
- int16_t samples[SAMPLES];
- int16_t samples_ref[SAMPLES];
- int16_t samples_orig[SAMPLES];
+ int16_t samples[SAMPLES+ALIGN];
+ int16_t samples_ref[SAMPLES+ALIGN];
+ int16_t samples_orig[SAMPLES+ALIGN];
uint32_t volumes[CHANNELS + PADDING];
unsigned i, padding;
pa_usec_t start, stop;
@@ -408,20 +409,20 @@
for (padding = 0; padding < PADDING; padding++, i++)
volumes[i] = volumes[padding];
- pa_volume_s16ne_neon(samples, volumes, CHANNELS, sizeof(samples));
- pa_volume_s16ne_c(samples_ref, volumes, CHANNELS, sizeof(samples_ref));
+ pa_volume_s16ne_neon(samples+ALIGN, volumes, CHANNELS, sizeof(samples));
+ pa_volume_s16ne_c(samples_ref+ALIGN, volumes, CHANNELS, sizeof(samples_ref));
- for (i = 0; i < SAMPLES; i++) {
+ for (i = ALIGN; i < SAMPLES+ALIGN; i++) {
if (abs(samples[i] - samples_ref[i]) > 0) {
pa_log_debug("%d: %d != %d (%d)", i, samples[i], samples_ref[i],
samples_orig[i]);
}
}
-exit(0);
+
start = pa_rtclock_now();
for (i = 0; i < TIMES; i++) {
memcpy(samples, samples_orig, sizeof(samples_orig));
- pa_volume_s16ne_neon(samples, volumes, CHANNELS, sizeof(samples));
+ pa_volume_s16ne_neon(samples+ALIGN, volumes, CHANNELS, sizeof(samples));
}
stop = pa_rtclock_now();
pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start));
@@ -429,7 +430,7 @@
start = pa_rtclock_now();
for (i = 0; i < TIMES; i++) {
memcpy(samples, samples_orig, sizeof(samples_orig));
- pa_volume_s16ne_arm(samples, volumes, CHANNELS, sizeof(samples));
+ pa_volume_s16ne_arm(samples+ALIGN, volumes, CHANNELS, sizeof(samples));
}
stop = pa_rtclock_now();
pa_log_info("ARM: %llu usec.", (long long unsigned int)(stop - start));