changeset 1:b829afbea564

more testing
author Peter Meerwald <p.meerwald@bct-electronic.com>
date Fri, 20 Apr 2012 14:26:14 +0200
parents e0040ee59c3c
children 09ee6a01a3d3
files remap_neon.c sconv_neon.c svolume_neon.c
diffstat 3 files changed, 386 insertions(+), 17 deletions(-) [+]
line wrap: on
line diff
--- a/remap_neon.c	Thu Jan 12 17:27:46 2012 +0100
+++ b/remap_neon.c	Fri Apr 20 14:26:14 2012 +0200
@@ -10,18 +10,31 @@
 #include <sys/time.h>
 #include <assert.h>
 
+typedef unsigned char uint8_t;
+typedef short int16_t;
+typedef unsigned int uint32_t;
 
-typedef short int16_t;
 typedef enum pa_sample_format {
     PA_SAMPLE_S16LE,
     PA_SAMPLE_FLOAT32LE,
 } pa_sample_format_t;
 #define PA_SAMPLE_S16NE PA_SAMPLE_S16LE
 #define PA_SAMPLE_FLOAT32NE PA_SAMPLE_FLOAT32LE
+
+typedef struct pa_sample_spec {
+  pa_sample_format_t format;
+  uint32_t rate;
+  uint8_t channels;
+} pa_sample_spec;
+
+#define PA_CHANNELS_MAX 32
 typedef struct {
     pa_sample_format_t *format;
+    pa_sample_spec *i_ss, *o_ss;
+    float map_table_f[PA_CHANNELS_MAX][PA_CHANNELS_MAX];
+    int32_t map_table_i[PA_CHANNELS_MAX][PA_CHANNELS_MAX];
 } pa_remap_t;
-typedef void (*pa_remap_func_t)(pa_remap_t *m, void *dst, const void *src, unsigned n);
+
 typedef long long unsigned int pa_usec_t;
 
 #define pa_assert(x) assert(x)
@@ -48,6 +61,80 @@
     return tv.tv_sec * 1000000ULL + tv.tv_usec;
 }
 
+static void remap_channels_matrix_c(pa_remap_t *m, void *dst, const void *src, unsigned n) {
+    unsigned oc, ic, i;
+    unsigned n_ic, n_oc;
+
+    n_ic = m->i_ss->channels;
+    n_oc = m->o_ss->channels;
+
+    switch (*m->format) {
+        case PA_SAMPLE_FLOAT32NE:
+        {
+            float *d, *s;
+
+            memset(dst, 0, n * sizeof(float) * n_oc);
+
+            for (oc = 0; oc < n_oc; oc++) {
+
+                for (ic = 0; ic < n_ic; ic++) {
+                    float vol;
+
+                    vol = m->map_table_f[oc][ic];
+
+                    if (vol <= 0.0)
+                        continue;
+
+                    d = (float *)dst + oc;
+                    s = (float *)src + ic;
+
+                    if (vol >= 1.0) {
+                        for (i = n; i > 0; i--, s += n_ic, d += n_oc)
+                            *d += *s;
+                    } else {
+                        for (i = n; i > 0; i--, s += n_ic, d += n_oc)
+                            *d += *s * vol;
+                    }
+                }
+            }
+
+            break;
+        }
+        case PA_SAMPLE_S16NE:
+        {
+            int16_t *d, *s;
+
+            memset(dst, 0, n * sizeof(int16_t) * n_oc);
+
+            for (oc = 0; oc < n_oc; oc++) {
+
+                for (ic = 0; ic < n_ic; ic++) {
+                    int32_t vol;
+
+                    vol = m->map_table_i[oc][ic];
+
+                    if (vol <= 0)
+                        continue;
+
+                    d = (int16_t *)dst + oc;
+                    s = (int16_t *)src + ic;
+
+                    if (vol >= 0x10000) {
+                        for (i = n; i > 0; i--, s += n_ic, d += n_oc)
+                            *d += *s;
+                    } else {
+                        for (i = n; i > 0; i--, s += n_ic, d += n_oc)
+                            *d += (int16_t) (((int32_t)*s * vol) >> 16);
+                    }
+                }
+            }
+            break;
+        }
+        default:
+            pa_assert_not_reached();
+    }
+}
+
 static void remap_mono_to_stereo_c(pa_remap_t *m, void *dst, const void *src, unsigned n) {
     unsigned i;
 
@@ -101,6 +188,54 @@
     }
 }
 
+
+
+static void remap_stereo_to_mono_c(pa_remap_t *m, void *dst, const void *src, unsigned n) {
+    unsigned i;
+
+    switch (*m->format) {
+        case PA_SAMPLE_FLOAT32NE:
+        {
+            float *d = (float *) dst, *s = (float *) src;
+
+            for (i = n >> 2; i > 0; i--) {
+                d[0] = s[0] + s[1];
+                d[1] = s[2] + s[3];
+                d[2] = s[4] + s[5];
+                d[3] = s[6] + s[7];
+                s += 8;
+                d += 4;
+            }
+            for (i = n & 3; i; i--) {
+                d[0] = s[0] + s[1];
+                s += 2;
+                d += 1;
+            }
+            break;
+        }
+        case PA_SAMPLE_S16NE:
+        {
+            int16_t *d = (int16_t *) dst, *s = (int16_t *) src;
+
+            for (i = n >> 2; i > 0; i--) {
+                *d++ += s[0] + s[1];
+                *d++ += s[2] + s[3];
+                *d++ += s[4] + s[5];
+                *d++ += s[6] + s[7];
+                s += 8;
+            }
+            for (i = n & 3; i; i--) {
+                *d++ += s[0] + s[1];
+                s += 2;
+            }
+            break;
+        }
+        default:
+            pa_assert_not_reached();
+    }
+}
+
+
 #if defined(__arm__)
 
 #include "arm_neon.h"
@@ -153,17 +288,66 @@
     }
 }
 
+/* don't inline, causes ICE, see https://bugs.launchpad.net/bugs/936863 */
+static __attribute__ ((noinline)) void stereo_to_mono_float(float *d, const float *s, unsigned n) {
+    unsigned i;
+
+    for (i = 0; i < n/4; i++) {
+        float32x4x2_t stereo = vld2q_f32(s);
+        float32x4_t mono = vaddq_f32(stereo.val[0], stereo.val[1]);
+        vst1q_f32(d, mono);
+        s += 8;
+        d += 4;
+    }
+    for (i = n & ~3; i < n; i++) {
+        d[0] = s[0] + s[1];
+        s += 2;
+        d++;
+    }
+}
+
+/* don't inline, causes ICE, see https://bugs.launchpad.net/bugs/936863 */
+static __attribute__ ((noinline)) void stereo_to_mono_int16(int16_t *d, const int16_t *s, unsigned n) {
+    unsigned int i;
+
+    for (i = 0; i < n/8; i++) {
+        int16x8x2_t stereo = vld2q_s16(s);
+        int16x8_t mono = vaddq_s16(stereo.val[0], stereo.val[1]);
+        vst1q_s16(d, mono);
+        s += 16;
+        d += 8;
+    }
+    for (i = n & ~7; i < n; i++) {
+        d[0] = s[0] + s[1];
+        s += 2;
+        d++;
+    }
+}
+
+static void remap_stereo_to_mono_neon(pa_remap_t *m, void *dst, const void *src, unsigned n) {
+    switch (*m->format) {
+        case PA_SAMPLE_FLOAT32NE:
+            stereo_to_mono_float(dst, src, n);
+            break;
+        case PA_SAMPLE_S16NE:
+            stereo_to_mono_int16(dst, src, n);
+            break;
+        default:
+            pa_assert_not_reached();
+    }
+}
 #define SAMPLES 1019
 #define TIMES 10000
 
-static void run_test_float(void) {
+static void run_test_mono_to_stereo_float(void) {
     float stereo[2*SAMPLES];
     float stereo_ref[2*SAMPLES];
+    float stereo_gen[2*SAMPLES];
     float mono[SAMPLES];
     int i;
     pa_usec_t start, stop;
-    pa_remap_func_t func;
     pa_sample_format_t sf;
+    pa_sample_spec iss, oss;
     pa_remap_t remap;
 
     pa_log_debug("checking NEON remap_mono_to_stereo(float, %d)", SAMPLES);
@@ -177,8 +361,17 @@
 
     sf = PA_SAMPLE_FLOAT32NE;
     remap.format = &sf;
-    func = (pa_remap_func_t) remap_mono_to_stereo_c;
-    func(&remap, stereo_ref, mono, SAMPLES);
+    iss.format = PA_SAMPLE_FLOAT32NE;
+    iss.channels = 1;
+    oss.format = PA_SAMPLE_FLOAT32NE;
+    oss.channels = 2;
+    remap.i_ss = &iss;
+    remap.o_ss = &oss;
+    remap.map_table_f[0][0] = 1.0;
+    remap.map_table_f[1][0] = 1.0;
+
+    remap_mono_to_stereo_c(&remap, stereo_ref, mono, SAMPLES);
+    remap_channels_matrix_c(&remap, stereo_gen, mono, SAMPLES);
     remap_mono_to_stereo_neon(&remap, stereo, mono, SAMPLES);
 
     for (i = 0; i < 2*SAMPLES; i++) {
@@ -187,6 +380,12 @@
                       mono[i/2]);
         }
     }
+    for (i = 0; i < 2*SAMPLES; i++) {
+        if (fabsf(stereo[i] - stereo_gen[i]) > 0.00001) {
+            pa_log_debug("%d: %.3f != %.3f (%.3f)", i, stereo[i], stereo_gen[i],
+                      mono[i/2]);
+        }
+    }
 
     start = pa_rtclock_now();
     for (i = 0; i < TIMES; i++) {
@@ -197,20 +396,92 @@
 
     start = pa_rtclock_now();
     for (i = 0; i < TIMES; i++) {
-        func(&remap, stereo_ref, mono, SAMPLES);
+        remap_mono_to_stereo_c(&remap, stereo_ref, mono, SAMPLES);
     }
     stop = pa_rtclock_now();
     pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start));
+
+    start = pa_rtclock_now();
+    for (i = 0; i < TIMES; i++) {
+        remap_channels_matrix_c(&remap, stereo_gen, mono, SAMPLES);
+    }
+    stop = pa_rtclock_now();
+    pa_log_info("generic: %llu usec.", (long long unsigned int)(stop - start));
 }
 
-static void run_test_s16(void) {
+static void run_test_stereo_to_mono_float(void) {
+    float stereo[2*SAMPLES];
+    float mono_ref[SAMPLES];
+    float mono_gen[SAMPLES];
+    float mono[SAMPLES];
+    int i;
+    pa_usec_t start, stop;
+    pa_sample_format_t sf;
+    pa_sample_spec iss, oss;
+    pa_remap_t remap;
+
+    pa_log_debug("checking NEON remap_stereo_to_mono(float, %d)", SAMPLES);
+
+    memset(mono_ref, 0, sizeof(mono_ref));
+    memset(mono, 0, sizeof(mono));
+
+    for (i = 0; i < 2*SAMPLES; i++) {
+        stereo[i] = rand()/(float) RAND_MAX - 0.5f;
+    }
+
+    sf = PA_SAMPLE_FLOAT32NE;
+    remap.format = &sf;
+    iss.format = PA_SAMPLE_FLOAT32NE;
+    iss.channels = 2;
+    oss.format = PA_SAMPLE_FLOAT32NE;
+    oss.channels = 1;
+    remap.i_ss = &iss;
+    remap.o_ss = &oss;
+    remap.map_table_f[0][0] = 1.0;
+    remap.map_table_f[0][1] = 1.0;
+
+    remap_stereo_to_mono_c(&remap, mono_ref, stereo, SAMPLES);
+    remap_channels_matrix_c(&remap, mono_gen, stereo, SAMPLES);
+    remap_stereo_to_mono_neon(&remap, mono, stereo, SAMPLES);
+
+    for (i = 0; i < SAMPLES; i++) {
+        if (fabsf(mono[i] - mono_ref[i]) > 0.00001) {
+            pa_log_debug("%d: %.3f != %.3f (%.3f %0.3f)", i, mono[i], mono_ref[i],
+                      stereo[2*i+0], stereo[2*i+1]);
+        }
+    }
+
+    start = pa_rtclock_now();
+    for (i = 0; i < TIMES; i++) {
+        remap_stereo_to_mono_neon(&remap, mono, stereo, SAMPLES);
+    }
+    stop = pa_rtclock_now();
+    pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start));
+
+    start = pa_rtclock_now();
+    for (i = 0; i < TIMES; i++) {
+        remap_stereo_to_mono_c(&remap, mono_ref, stereo, SAMPLES);
+    }
+    stop = pa_rtclock_now();
+    pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start));
+
+    start = pa_rtclock_now();
+    for (i = 0; i < TIMES; i++) {
+        remap_channels_matrix_c(&remap, mono_gen, stereo, SAMPLES);
+    }
+    stop = pa_rtclock_now();
+    pa_log_info("generic: %llu usec.", (long long unsigned int)(stop - start));
+}
+
+static void run_test_mono_to_stereo_s16(void) {
     int16_t stereo[2*SAMPLES];
     int16_t stereo_ref[2*SAMPLES];
+    int16_t stereo_gen[2*SAMPLES];
     int16_t mono[SAMPLES];
     int i;
     pa_usec_t start, stop;
-    pa_remap_func_t func;
     pa_sample_format_t sf;
+    pa_sample_spec iss, oss;
     pa_remap_t remap;
 
     pa_log_debug("checking NEON remap_mono_to_stereo(s16, %d)", SAMPLES);
@@ -224,10 +495,19 @@
 
     sf = PA_SAMPLE_S16NE;
     remap.format = &sf;
-    func = (pa_remap_func_t) remap_mono_to_stereo_c;
-    func(&remap, stereo_ref, mono, SAMPLES);
+    iss.format = PA_SAMPLE_S16NE;
+    iss.channels = 1;
+    oss.format = PA_SAMPLE_S16NE;
+    oss.channels = 2;
+    remap.i_ss = &iss;
+    remap.o_ss = &oss;
+    remap.map_table_f[0][0] = 1.0;
+    remap.map_table_f[1][0] = 1.0;
+    
+    remap_mono_to_stereo_c(&remap, stereo_ref, mono, SAMPLES);
+    remap_channels_matrix_c(&remap, stereo_gen, mono, SAMPLES);
     remap_mono_to_stereo_neon(&remap, stereo, mono, SAMPLES);
-
+    
     for (i = 0; i < 2*SAMPLES; i++) {
         if (abs(stereo[i] - stereo_ref[i]) > 0) {
             pa_log_debug("%d: %d != %d (%d)", i, stereo[i], stereo_ref[i],
@@ -235,6 +515,13 @@
         }
     }
 
+    for (i = 0; i < 2*SAMPLES; i++) {
+        if (abs(stereo[i] - stereo_gen[i]) > 0) {
+            pa_log_debug("%d: %d != %d (%d)", i, stereo[i], stereo_gen[i],
+                      mono[i/2]);
+        }
+    }
+
     start = pa_rtclock_now();
     for (i = 0; i < TIMES; i++) {
         remap_mono_to_stereo_neon(&remap, stereo, mono, SAMPLES);
@@ -244,18 +531,100 @@
 
     start = pa_rtclock_now();
     for (i = 0; i < TIMES; i++) {
-        func(&remap, stereo_ref, mono, SAMPLES);
+        remap_mono_to_stereo_c(&remap, stereo_ref, mono, SAMPLES);
     }
     stop = pa_rtclock_now();
     pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start));
+
+    start = pa_rtclock_now();
+    for (i = 0; i < TIMES; i++) {
+        remap_channels_matrix_c(&remap, stereo_gen, mono, SAMPLES);
+    }
+    stop = pa_rtclock_now();
+    pa_log_info("generic: %llu usec.", (long long unsigned int)(stop - start));
 }
 
+static void run_test_stereo_to_mono_s16(void) {
+    int16_t stereo[2*SAMPLES];
+    int16_t mono_ref[SAMPLES];
+    int16_t mono_gen[SAMPLES];
+    int16_t mono[SAMPLES];
+    int i;
+    pa_usec_t start, stop;
+    pa_sample_format_t sf;
+    pa_sample_spec iss, oss;
+    pa_remap_t remap;
+
+    pa_log_debug("checking NEON remap_stereo_to_mono(s16, %d)", SAMPLES);
+
+    memset(mono_ref, 0, sizeof(mono_ref));
+    memset(mono, 0, sizeof(mono));
+
+    for (i = 0; i < 2*SAMPLES; i++) {
+        stereo[i] = rand() - RAND_MAX/2;
+    }
+
+    sf = PA_SAMPLE_S16NE;
+    remap.format = &sf;
+    iss.format = PA_SAMPLE_S16NE;
+    iss.channels = 2;
+    oss.format = PA_SAMPLE_S16NE;
+    oss.channels = 1;
+    remap.i_ss = &iss;
+    remap.o_ss = &oss;
+    remap.map_table_f[0][0] = 1.0;
+    remap.map_table_f[0][1] = 1.0;
+    
+    remap_stereo_to_mono_c(&remap, mono_ref, stereo, SAMPLES);
+    remap_channels_matrix_c(&remap, mono_gen, stereo, SAMPLES);
+    remap_stereo_to_mono_neon(&remap, mono, stereo, SAMPLES);
+
+    for (i = 0; i < SAMPLES; i++) {
+        if (abs(mono[i] - mono_ref[i]) > 0) {
+            pa_log_debug("%d: %d != %d (%d)", i, mono[i], mono_ref[i],
+                      stereo[2*i+0], stereo[2*i+1]);
+        }
+    }
+    for (i = 0; i < SAMPLES; i++) {
+        if (abs(mono[i] - mono_gen[i]) > 0) {
+            pa_log_debug("%d: %d != %d (%d)", i, mono[i], mono_gen[i],
+                      stereo[2*i+0], stereo[2*i+1]);
+        }
+    }
+
+    start = pa_rtclock_now();
+    for (i = 0; i < TIMES; i++) {
+        remap_stereo_to_mono_neon(&remap, mono, stereo, SAMPLES);
+    }
+    stop = pa_rtclock_now();
+    pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start));
+
+    start = pa_rtclock_now();
+    for (i = 0; i < TIMES; i++) {
+        remap_stereo_to_mono_c(&remap, mono_ref, stereo, SAMPLES);
+    }
+    stop = pa_rtclock_now();
+    pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start));
+
+    start = pa_rtclock_now();
+    for (i = 0; i < TIMES; i++) {
+        remap_channels_matrix_c(&remap, mono_gen, stereo, SAMPLES);
+    }
+    stop = pa_rtclock_now();
+    pa_log_info("generic: %llu usec.", (long long unsigned int)(stop - start));
+}
+
+
 #endif /* defined(__arm__) */
 
 int main() {
 
-    run_test_float();
-    run_test_s16();
+    run_test_stereo_to_mono_float();
+    run_test_stereo_to_mono_s16();
+
+    run_test_mono_to_stereo_float();
+    run_test_mono_to_stereo_s16();
+
 
     return EXIT_SUCCESS;
 }
--- a/sconv_neon.c	Thu Jan 12 17:27:46 2012 +0100
+++ b/sconv_neon.c	Fri Apr 20 14:26:14 2012 +0200
@@ -103,7 +103,7 @@
 }
 
 #define SAMPLES 1019
-#define TIMES 300
+#define TIMES 10000
 
 static void run_test_from(void) {
     int16_t samples[SAMPLES];
--- a/svolume_neon.c	Thu Jan 12 17:27:46 2012 +0100
+++ b/svolume_neon.c	Fri Apr 20 14:26:14 2012 +0200
@@ -333,7 +333,7 @@
 }
 
 #define SAMPLES 1019
-#define TIMES 1000
+#define TIMES 3000
 #define CHANNELS 4
 #define PADDING 16
 

Repositories maintained by Peter Meerwald, pmeerw@pmeerw.net.