diff remap_neon.c @ 0:e0040ee59c3c

import
author Peter Meerwald <p.meerwald@bct-electronic.com>
date Thu, 12 Jan 2012 17:27:46 +0100
parents
children b829afbea564
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/remap_neon.c	Thu Jan 12 17:27:46 2012 +0100
@@ -0,0 +1,261 @@
+/*
+ * Copyright 2012 Peter Meerwald <p.meerwald@bct-electronic.com>
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <math.h>
+#include <sys/time.h>
+#include <assert.h>
+
+
+typedef short int16_t;
+typedef enum pa_sample_format {
+    PA_SAMPLE_S16LE,
+    PA_SAMPLE_FLOAT32LE,
+} pa_sample_format_t;
+#define PA_SAMPLE_S16NE PA_SAMPLE_S16LE
+#define PA_SAMPLE_FLOAT32NE PA_SAMPLE_FLOAT32LE
+typedef struct {
+    pa_sample_format_t *format;
+} pa_remap_t;
+typedef void (*pa_remap_func_t)(pa_remap_t *m, void *dst, const void *src, unsigned n);
+typedef long long unsigned int pa_usec_t;
+
+#define pa_assert(x) assert(x)
+#define pa_assert_not_reached() assert(0)
+
+#define PA_CLAMP_UNLIKELY(x, low, high) \
+    (((x) < (low)) ? (low) : (((x) > (high)) ? (high) : (x)))
+
+static void pa_log_info(const char *format, ...)  {
+    va_list ap;
+    char buf[1024];
+    va_start(ap, format);
+    vsprintf(buf, format, ap);
+    printf("%s\n", buf);
+    va_end(ap);
+}
+
+#define pa_log_debug pa_log_info
+
+static pa_usec_t pa_rtclock_now() {
+    struct timeval tv;
+    gettimeofday(&tv, NULL);
+
+    return tv.tv_sec * 1000000ULL + tv.tv_usec;
+}
+
+static void remap_mono_to_stereo_c(pa_remap_t *m, void *dst, const void *src, unsigned n) {
+    unsigned i;
+
+    switch (*m->format) {
+        case PA_SAMPLE_FLOAT32NE:
+        {
+            float *d, *s;
+
+            d = (float *) dst;
+            s = (float *) src;
+
+            for (i = n >> 2; i; i--) {
+                d[0] = d[1] = s[0];
+                d[2] = d[3] = s[1];
+                d[4] = d[5] = s[2];
+                d[6] = d[7] = s[3];
+                s += 4;
+                d += 8;
+            }
+            for (i = n & 3; i; i--) {
+                d[0] = d[1] = s[0];
+                s++;
+                d += 2;
+            }
+            break;
+        }
+        case PA_SAMPLE_S16NE:
+        {
+            int16_t *d, *s;
+
+            d = (int16_t *) dst;
+            s = (int16_t *) src;
+
+            for (i = n >> 2; i; i--) {
+                d[0] = d[1] = s[0];
+                d[2] = d[3] = s[1];
+                d[4] = d[5] = s[2];
+                d[6] = d[7] = s[3];
+                s += 4;
+                d += 8;
+            }
+            for (i = n & 3; i; i--) {
+                d[0] = d[1] = s[0];
+                s++;
+                d += 2;
+            }
+            break;
+        }
+        default:
+            pa_assert_not_reached();
+    }
+}
+
+#if defined(__arm__)
+
+#include "arm_neon.h"
+
+void remap_mono_to_stereo_neon(pa_remap_t *m, void *dst, const void *src, unsigned n) {
+    unsigned i;
+    switch (*m->format) {
+        case PA_SAMPLE_FLOAT32NE:
+        {
+            float *d = (float *) dst, *s = (float *) src;
+            
+            for (i = 0; i < n/4; i++) {
+                float32x4x2_t stereo; 
+                stereo.val[0] = vld1q_f32(s);
+                stereo.val[1] = stereo.val[0];
+                vst2q_f32(d, stereo);
+                s += 4;
+                d += 8;
+            }
+            
+            for (i = n & ~3; i < n; i++) {
+                d[0] = d[1] = s[0];
+                s++;
+                d += 2;
+            }
+            break;
+        }
+        case PA_SAMPLE_S16NE:
+        {
+            int16_t *d = (int16_t *) dst, *s = (int16_t *) src;
+
+            for (i = 0; i < n/8; i++) {
+                int16x8x2_t stereo; 
+                stereo.val[0] = vld1q_s16(s);
+                stereo.val[1] = stereo.val[0];
+                vst2q_s16(d, stereo);
+                s += 8;
+                d += 16;
+            }
+
+            for (i = n & ~7; i < n; i++) {
+                d[0] = d[1] = s[0];
+                s++;
+                d += 2;
+            }
+            break;
+        }
+        default:
+            pa_assert_not_reached();
+    }
+}
+
+#define SAMPLES 1019
+#define TIMES 10000
+
+static void run_test_float(void) {
+    float stereo[2*SAMPLES];
+    float stereo_ref[2*SAMPLES];
+    float mono[SAMPLES];
+    int i;
+    pa_usec_t start, stop;
+    pa_remap_func_t func;
+    pa_sample_format_t sf;
+    pa_remap_t remap;
+
+    pa_log_debug("checking NEON remap_mono_to_stereo(float, %d)", SAMPLES);
+
+    memset(stereo_ref, 0, sizeof(stereo_ref));
+    memset(stereo, 0, sizeof(stereo));
+
+    for (i = 0; i < SAMPLES; i++) {
+        mono[i] = rand()/(float) RAND_MAX - 0.5f;
+    }
+
+    sf = PA_SAMPLE_FLOAT32NE;
+    remap.format = &sf;
+    func = (pa_remap_func_t) remap_mono_to_stereo_c;
+    func(&remap, stereo_ref, mono, SAMPLES);
+    remap_mono_to_stereo_neon(&remap, stereo, mono, SAMPLES);
+
+    for (i = 0; i < 2*SAMPLES; i++) {
+        if (fabsf(stereo[i] - stereo_ref[i]) > 0.00001) {
+            pa_log_debug("%d: %.3f != %.3f (%.3f)", i, stereo[i], stereo_ref[i],
+                      mono[i/2]);
+        }
+    }
+
+    start = pa_rtclock_now();
+    for (i = 0; i < TIMES; i++) {
+        remap_mono_to_stereo_neon(&remap, stereo, mono, SAMPLES);
+    }
+    stop = pa_rtclock_now();
+    pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start));
+
+    start = pa_rtclock_now();
+    for (i = 0; i < TIMES; i++) {
+        func(&remap, stereo_ref, mono, SAMPLES);
+    }
+    stop = pa_rtclock_now();
+    pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start));
+}
+
+static void run_test_s16(void) {
+    int16_t stereo[2*SAMPLES];
+    int16_t stereo_ref[2*SAMPLES];
+    int16_t mono[SAMPLES];
+    int i;
+    pa_usec_t start, stop;
+    pa_remap_func_t func;
+    pa_sample_format_t sf;
+    pa_remap_t remap;
+
+    pa_log_debug("checking NEON remap_mono_to_stereo(s16, %d)", SAMPLES);
+
+    memset(stereo_ref, 0, sizeof(stereo_ref));
+    memset(stereo, 0, sizeof(stereo));
+
+    for (i = 0; i < SAMPLES; i++) {
+        mono[i] = rand() - RAND_MAX/2;
+    }
+
+    sf = PA_SAMPLE_S16NE;
+    remap.format = &sf;
+    func = (pa_remap_func_t) remap_mono_to_stereo_c;
+    func(&remap, stereo_ref, mono, SAMPLES);
+    remap_mono_to_stereo_neon(&remap, stereo, mono, SAMPLES);
+
+    for (i = 0; i < 2*SAMPLES; i++) {
+        if (abs(stereo[i] - stereo_ref[i]) > 0) {
+            pa_log_debug("%d: %d != %d (%d)", i, stereo[i], stereo_ref[i],
+                      mono[i/2]);
+        }
+    }
+
+    start = pa_rtclock_now();
+    for (i = 0; i < TIMES; i++) {
+        remap_mono_to_stereo_neon(&remap, stereo, mono, SAMPLES);
+    }
+    stop = pa_rtclock_now();
+    pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start));
+
+    start = pa_rtclock_now();
+    for (i = 0; i < TIMES; i++) {
+        func(&remap, stereo_ref, mono, SAMPLES);
+    }
+    stop = pa_rtclock_now();
+    pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start));
+}
+
+#endif /* defined(__arm__) */
+
+int main() {
+
+    run_test_float();
+    run_test_s16();
+
+    return EXIT_SUCCESS;
+}

Repositories maintained by Peter Meerwald, pmeerw@pmeerw.net.