# HG changeset patch
# User Peter Meerwald
# Date 1334924774 -7200
# Node ID b829afbea564ccfc4ec54e75505c396c61ddaf0d
# Parent e0040ee59c3c88b43a3ca2826188a2db74b15b51
more testing
diff -r e0040ee59c3c -r b829afbea564 remap_neon.c
--- a/remap_neon.c Thu Jan 12 17:27:46 2012 +0100
+++ b/remap_neon.c Fri Apr 20 14:26:14 2012 +0200
@@ -10,18 +10,31 @@
#include
#include
+typedef unsigned char uint8_t;
+typedef short int16_t;
+typedef unsigned int uint32_t;
-typedef short int16_t;
typedef enum pa_sample_format {
PA_SAMPLE_S16LE,
PA_SAMPLE_FLOAT32LE,
} pa_sample_format_t;
#define PA_SAMPLE_S16NE PA_SAMPLE_S16LE
#define PA_SAMPLE_FLOAT32NE PA_SAMPLE_FLOAT32LE
+
+typedef struct pa_sample_spec {
+ pa_sample_format_t format;
+ uint32_t rate;
+ uint8_t channels;
+} pa_sample_spec;
+
+#define PA_CHANNELS_MAX 32
typedef struct {
pa_sample_format_t *format;
+ pa_sample_spec *i_ss, *o_ss;
+ float map_table_f[PA_CHANNELS_MAX][PA_CHANNELS_MAX];
+ int32_t map_table_i[PA_CHANNELS_MAX][PA_CHANNELS_MAX];
} pa_remap_t;
-typedef void (*pa_remap_func_t)(pa_remap_t *m, void *dst, const void *src, unsigned n);
+
typedef long long unsigned int pa_usec_t;
#define pa_assert(x) assert(x)
@@ -48,6 +61,80 @@
return tv.tv_sec * 1000000ULL + tv.tv_usec;
}
+static void remap_channels_matrix_c(pa_remap_t *m, void *dst, const void *src, unsigned n) {
+ unsigned oc, ic, i;
+ unsigned n_ic, n_oc;
+
+ n_ic = m->i_ss->channels;
+ n_oc = m->o_ss->channels;
+
+ switch (*m->format) {
+ case PA_SAMPLE_FLOAT32NE:
+ {
+ float *d, *s;
+
+ memset(dst, 0, n * sizeof(float) * n_oc);
+
+ for (oc = 0; oc < n_oc; oc++) {
+
+ for (ic = 0; ic < n_ic; ic++) {
+ float vol;
+
+ vol = m->map_table_f[oc][ic];
+
+ if (vol <= 0.0)
+ continue;
+
+ d = (float *)dst + oc;
+ s = (float *)src + ic;
+
+ if (vol >= 1.0) {
+ for (i = n; i > 0; i--, s += n_ic, d += n_oc)
+ *d += *s;
+ } else {
+ for (i = n; i > 0; i--, s += n_ic, d += n_oc)
+ *d += *s * vol;
+ }
+ }
+ }
+
+ break;
+ }
+ case PA_SAMPLE_S16NE:
+ {
+ int16_t *d, *s;
+
+ memset(dst, 0, n * sizeof(int16_t) * n_oc);
+
+ for (oc = 0; oc < n_oc; oc++) {
+
+ for (ic = 0; ic < n_ic; ic++) {
+ int32_t vol;
+
+ vol = m->map_table_i[oc][ic];
+
+ if (vol <= 0)
+ continue;
+
+ d = (int16_t *)dst + oc;
+ s = (int16_t *)src + ic;
+
+ if (vol >= 0x10000) {
+ for (i = n; i > 0; i--, s += n_ic, d += n_oc)
+ *d += *s;
+ } else {
+ for (i = n; i > 0; i--, s += n_ic, d += n_oc)
+ *d += (int16_t) (((int32_t)*s * vol) >> 16);
+ }
+ }
+ }
+ break;
+ }
+ default:
+ pa_assert_not_reached();
+ }
+}
+
static void remap_mono_to_stereo_c(pa_remap_t *m, void *dst, const void *src, unsigned n) {
unsigned i;
@@ -101,6 +188,54 @@
}
}
+
+
+static void remap_stereo_to_mono_c(pa_remap_t *m, void *dst, const void *src, unsigned n) {
+ unsigned i;
+
+ switch (*m->format) {
+ case PA_SAMPLE_FLOAT32NE:
+ {
+ float *d = (float *) dst, *s = (float *) src;
+
+ for (i = n >> 2; i > 0; i--) {
+ d[0] = s[0] + s[1];
+ d[1] = s[2] + s[3];
+ d[2] = s[4] + s[5];
+ d[3] = s[6] + s[7];
+ s += 8;
+ d += 4;
+ }
+ for (i = n & 3; i; i--) {
+ d[0] = s[0] + s[1];
+ s += 2;
+ d += 1;
+ }
+ break;
+ }
+ case PA_SAMPLE_S16NE:
+ {
+ int16_t *d = (int16_t *) dst, *s = (int16_t *) src;
+
+ for (i = n >> 2; i > 0; i--) {
+ *d++ += s[0] + s[1];
+ *d++ += s[2] + s[3];
+ *d++ += s[4] + s[5];
+ *d++ += s[6] + s[7];
+ s += 8;
+ }
+ for (i = n & 3; i; i--) {
+ *d++ += s[0] + s[1];
+ s += 2;
+ }
+ break;
+ }
+ default:
+ pa_assert_not_reached();
+ }
+}
+
+
#if defined(__arm__)
#include "arm_neon.h"
@@ -153,17 +288,66 @@
}
}
+/* don't inline, causes ICE, see https://bugs.launchpad.net/bugs/936863 */
+static __attribute__ ((noinline)) void stereo_to_mono_float(float *d, const float *s, unsigned n) {
+ unsigned i;
+
+ for (i = 0; i < n/4; i++) {
+ float32x4x2_t stereo = vld2q_f32(s);
+ float32x4_t mono = vaddq_f32(stereo.val[0], stereo.val[1]);
+ vst1q_f32(d, mono);
+ s += 8;
+ d += 4;
+ }
+ for (i = n & ~3; i < n; i++) {
+ d[0] = s[0] + s[1];
+ s += 2;
+ d++;
+ }
+}
+
+/* don't inline, causes ICE, see https://bugs.launchpad.net/bugs/936863 */
+static __attribute__ ((noinline)) void stereo_to_mono_int16(int16_t *d, const int16_t *s, unsigned n) {
+ unsigned int i;
+
+ for (i = 0; i < n/8; i++) {
+ int16x8x2_t stereo = vld2q_s16(s);
+ int16x8_t mono = vaddq_s16(stereo.val[0], stereo.val[1]);
+ vst1q_s16(d, mono);
+ s += 16;
+ d += 8;
+ }
+ for (i = n & ~7; i < n; i++) {
+ d[0] = s[0] + s[1];
+ s += 2;
+ d++;
+ }
+}
+
+static void remap_stereo_to_mono_neon(pa_remap_t *m, void *dst, const void *src, unsigned n) {
+ switch (*m->format) {
+ case PA_SAMPLE_FLOAT32NE:
+ stereo_to_mono_float(dst, src, n);
+ break;
+ case PA_SAMPLE_S16NE:
+ stereo_to_mono_int16(dst, src, n);
+ break;
+ default:
+ pa_assert_not_reached();
+ }
+}
#define SAMPLES 1019
#define TIMES 10000
-static void run_test_float(void) {
+static void run_test_mono_to_stereo_float(void) {
float stereo[2*SAMPLES];
float stereo_ref[2*SAMPLES];
+ float stereo_gen[2*SAMPLES];
float mono[SAMPLES];
int i;
pa_usec_t start, stop;
- pa_remap_func_t func;
pa_sample_format_t sf;
+ pa_sample_spec iss, oss;
pa_remap_t remap;
pa_log_debug("checking NEON remap_mono_to_stereo(float, %d)", SAMPLES);
@@ -177,8 +361,17 @@
sf = PA_SAMPLE_FLOAT32NE;
remap.format = &sf;
- func = (pa_remap_func_t) remap_mono_to_stereo_c;
- func(&remap, stereo_ref, mono, SAMPLES);
+ iss.format = PA_SAMPLE_FLOAT32NE;
+ iss.channels = 1;
+ oss.format = PA_SAMPLE_FLOAT32NE;
+ oss.channels = 2;
+ remap.i_ss = &iss;
+ remap.o_ss = &oss;
+ remap.map_table_f[0][0] = 1.0;
+ remap.map_table_f[1][0] = 1.0;
+
+ remap_mono_to_stereo_c(&remap, stereo_ref, mono, SAMPLES);
+ remap_channels_matrix_c(&remap, stereo_gen, mono, SAMPLES);
remap_mono_to_stereo_neon(&remap, stereo, mono, SAMPLES);
for (i = 0; i < 2*SAMPLES; i++) {
@@ -187,6 +380,12 @@
mono[i/2]);
}
}
+ for (i = 0; i < 2*SAMPLES; i++) {
+ if (fabsf(stereo[i] - stereo_gen[i]) > 0.00001) {
+ pa_log_debug("%d: %.3f != %.3f (%.3f)", i, stereo[i], stereo_gen[i],
+ mono[i/2]);
+ }
+ }
start = pa_rtclock_now();
for (i = 0; i < TIMES; i++) {
@@ -197,20 +396,92 @@
start = pa_rtclock_now();
for (i = 0; i < TIMES; i++) {
- func(&remap, stereo_ref, mono, SAMPLES);
+ remap_mono_to_stereo_c(&remap, stereo_ref, mono, SAMPLES);
}
stop = pa_rtclock_now();
pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start));
+
+ start = pa_rtclock_now();
+ for (i = 0; i < TIMES; i++) {
+ remap_channels_matrix_c(&remap, stereo_gen, mono, SAMPLES);
+ }
+ stop = pa_rtclock_now();
+ pa_log_info("generic: %llu usec.", (long long unsigned int)(stop - start));
}
-static void run_test_s16(void) {
+static void run_test_stereo_to_mono_float(void) {
+ float stereo[2*SAMPLES];
+ float mono_ref[SAMPLES];
+ float mono_gen[SAMPLES];
+ float mono[SAMPLES];
+ int i;
+ pa_usec_t start, stop;
+ pa_sample_format_t sf;
+ pa_sample_spec iss, oss;
+ pa_remap_t remap;
+
+ pa_log_debug("checking NEON remap_stereo_to_mono(float, %d)", SAMPLES);
+
+ memset(mono_ref, 0, sizeof(mono_ref));
+ memset(mono, 0, sizeof(mono));
+
+ for (i = 0; i < 2*SAMPLES; i++) {
+ stereo[i] = rand()/(float) RAND_MAX - 0.5f;
+ }
+
+ sf = PA_SAMPLE_FLOAT32NE;
+ remap.format = &sf;
+ iss.format = PA_SAMPLE_FLOAT32NE;
+ iss.channels = 2;
+ oss.format = PA_SAMPLE_FLOAT32NE;
+ oss.channels = 1;
+ remap.i_ss = &iss;
+ remap.o_ss = &oss;
+ remap.map_table_f[0][0] = 1.0;
+ remap.map_table_f[0][1] = 1.0;
+
+ remap_stereo_to_mono_c(&remap, mono_ref, stereo, SAMPLES);
+ remap_channels_matrix_c(&remap, mono_gen, stereo, SAMPLES);
+ remap_stereo_to_mono_neon(&remap, mono, stereo, SAMPLES);
+
+ for (i = 0; i < SAMPLES; i++) {
+ if (fabsf(mono[i] - mono_ref[i]) > 0.00001) {
+ pa_log_debug("%d: %.3f != %.3f (%.3f %0.3f)", i, mono[i], mono_ref[i],
+ stereo[2*i+0], stereo[2*i+1]);
+ }
+ }
+
+ start = pa_rtclock_now();
+ for (i = 0; i < TIMES; i++) {
+ remap_stereo_to_mono_neon(&remap, mono, stereo, SAMPLES);
+ }
+ stop = pa_rtclock_now();
+ pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start));
+
+ start = pa_rtclock_now();
+ for (i = 0; i < TIMES; i++) {
+ remap_stereo_to_mono_c(&remap, mono_ref, stereo, SAMPLES);
+ }
+ stop = pa_rtclock_now();
+ pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start));
+
+ start = pa_rtclock_now();
+ for (i = 0; i < TIMES; i++) {
+ remap_channels_matrix_c(&remap, mono_gen, stereo, SAMPLES);
+ }
+ stop = pa_rtclock_now();
+ pa_log_info("generic: %llu usec.", (long long unsigned int)(stop - start));
+}
+
+static void run_test_mono_to_stereo_s16(void) {
int16_t stereo[2*SAMPLES];
int16_t stereo_ref[2*SAMPLES];
+ int16_t stereo_gen[2*SAMPLES];
int16_t mono[SAMPLES];
int i;
pa_usec_t start, stop;
- pa_remap_func_t func;
pa_sample_format_t sf;
+ pa_sample_spec iss, oss;
pa_remap_t remap;
pa_log_debug("checking NEON remap_mono_to_stereo(s16, %d)", SAMPLES);
@@ -224,10 +495,19 @@
sf = PA_SAMPLE_S16NE;
remap.format = &sf;
- func = (pa_remap_func_t) remap_mono_to_stereo_c;
- func(&remap, stereo_ref, mono, SAMPLES);
+ iss.format = PA_SAMPLE_S16NE;
+ iss.channels = 1;
+ oss.format = PA_SAMPLE_S16NE;
+ oss.channels = 2;
+ remap.i_ss = &iss;
+ remap.o_ss = &oss;
+ remap.map_table_f[0][0] = 1.0;
+ remap.map_table_f[1][0] = 1.0;
+
+ remap_mono_to_stereo_c(&remap, stereo_ref, mono, SAMPLES);
+ remap_channels_matrix_c(&remap, stereo_gen, mono, SAMPLES);
remap_mono_to_stereo_neon(&remap, stereo, mono, SAMPLES);
-
+
for (i = 0; i < 2*SAMPLES; i++) {
if (abs(stereo[i] - stereo_ref[i]) > 0) {
pa_log_debug("%d: %d != %d (%d)", i, stereo[i], stereo_ref[i],
@@ -235,6 +515,13 @@
}
}
+ for (i = 0; i < 2*SAMPLES; i++) {
+ if (abs(stereo[i] - stereo_gen[i]) > 0) {
+ pa_log_debug("%d: %d != %d (%d)", i, stereo[i], stereo_gen[i],
+ mono[i/2]);
+ }
+ }
+
start = pa_rtclock_now();
for (i = 0; i < TIMES; i++) {
remap_mono_to_stereo_neon(&remap, stereo, mono, SAMPLES);
@@ -244,18 +531,100 @@
start = pa_rtclock_now();
for (i = 0; i < TIMES; i++) {
- func(&remap, stereo_ref, mono, SAMPLES);
+ remap_mono_to_stereo_c(&remap, stereo_ref, mono, SAMPLES);
}
stop = pa_rtclock_now();
pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start));
+
+ start = pa_rtclock_now();
+ for (i = 0; i < TIMES; i++) {
+ remap_channels_matrix_c(&remap, stereo_gen, mono, SAMPLES);
+ }
+ stop = pa_rtclock_now();
+ pa_log_info("generic: %llu usec.", (long long unsigned int)(stop - start));
}
+static void run_test_stereo_to_mono_s16(void) {
+ int16_t stereo[2*SAMPLES];
+ int16_t mono_ref[SAMPLES];
+ int16_t mono_gen[SAMPLES];
+ int16_t mono[SAMPLES];
+ int i;
+ pa_usec_t start, stop;
+ pa_sample_format_t sf;
+ pa_sample_spec iss, oss;
+ pa_remap_t remap;
+
+ pa_log_debug("checking NEON remap_stereo_to_mono(s16, %d)", SAMPLES);
+
+ memset(mono_ref, 0, sizeof(mono_ref));
+ memset(mono, 0, sizeof(mono));
+
+ for (i = 0; i < 2*SAMPLES; i++) {
+ stereo[i] = rand() - RAND_MAX/2;
+ }
+
+ sf = PA_SAMPLE_S16NE;
+ remap.format = &sf;
+ iss.format = PA_SAMPLE_S16NE;
+ iss.channels = 2;
+ oss.format = PA_SAMPLE_S16NE;
+ oss.channels = 1;
+ remap.i_ss = &iss;
+ remap.o_ss = &oss;
+ remap.map_table_f[0][0] = 1.0;
+ remap.map_table_f[0][1] = 1.0;
+
+ remap_stereo_to_mono_c(&remap, mono_ref, stereo, SAMPLES);
+ remap_channels_matrix_c(&remap, mono_gen, stereo, SAMPLES);
+ remap_stereo_to_mono_neon(&remap, mono, stereo, SAMPLES);
+
+ for (i = 0; i < SAMPLES; i++) {
+ if (abs(mono[i] - mono_ref[i]) > 0) {
+ pa_log_debug("%d: %d != %d (%d)", i, mono[i], mono_ref[i],
+ stereo[2*i+0], stereo[2*i+1]);
+ }
+ }
+ for (i = 0; i < SAMPLES; i++) {
+ if (abs(mono[i] - mono_gen[i]) > 0) {
+ pa_log_debug("%d: %d != %d (%d)", i, mono[i], mono_gen[i],
+ stereo[2*i+0], stereo[2*i+1]);
+ }
+ }
+
+ start = pa_rtclock_now();
+ for (i = 0; i < TIMES; i++) {
+ remap_stereo_to_mono_neon(&remap, mono, stereo, SAMPLES);
+ }
+ stop = pa_rtclock_now();
+ pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start));
+
+ start = pa_rtclock_now();
+ for (i = 0; i < TIMES; i++) {
+ remap_stereo_to_mono_c(&remap, mono_ref, stereo, SAMPLES);
+ }
+ stop = pa_rtclock_now();
+ pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start));
+
+ start = pa_rtclock_now();
+ for (i = 0; i < TIMES; i++) {
+ remap_channels_matrix_c(&remap, mono_gen, stereo, SAMPLES);
+ }
+ stop = pa_rtclock_now();
+ pa_log_info("generic: %llu usec.", (long long unsigned int)(stop - start));
+}
+
+
#endif /* defined(__arm__) */
int main() {
- run_test_float();
- run_test_s16();
+ run_test_stereo_to_mono_float();
+ run_test_stereo_to_mono_s16();
+
+ run_test_mono_to_stereo_float();
+ run_test_mono_to_stereo_s16();
+
return EXIT_SUCCESS;
}
diff -r e0040ee59c3c -r b829afbea564 sconv_neon.c
--- a/sconv_neon.c Thu Jan 12 17:27:46 2012 +0100
+++ b/sconv_neon.c Fri Apr 20 14:26:14 2012 +0200
@@ -103,7 +103,7 @@
}
#define SAMPLES 1019
-#define TIMES 300
+#define TIMES 10000
static void run_test_from(void) {
int16_t samples[SAMPLES];
diff -r e0040ee59c3c -r b829afbea564 svolume_neon.c
--- a/svolume_neon.c Thu Jan 12 17:27:46 2012 +0100
+++ b/svolume_neon.c Fri Apr 20 14:26:14 2012 +0200
@@ -333,7 +333,7 @@
}
#define SAMPLES 1019
-#define TIMES 1000
+#define TIMES 3000
#define CHANNELS 4
#define PADDING 16