Mercurial > hg > pa-neon
diff remap_neon.c @ 0:e0040ee59c3c
import
author | Peter Meerwald <p.meerwald@bct-electronic.com> |
---|---|
date | Thu, 12 Jan 2012 17:27:46 +0100 |
parents | |
children | b829afbea564 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/remap_neon.c Thu Jan 12 17:27:46 2012 +0100 @@ -0,0 +1,261 @@ +/* + * Copyright 2012 Peter Meerwald <p.meerwald@bct-electronic.com> + */ + +#include <stdlib.h> +#include <stdio.h> +#include <stdarg.h> +#include <string.h> +#include <math.h> +#include <sys/time.h> +#include <assert.h> + + +typedef short int16_t; +typedef enum pa_sample_format { + PA_SAMPLE_S16LE, + PA_SAMPLE_FLOAT32LE, +} pa_sample_format_t; +#define PA_SAMPLE_S16NE PA_SAMPLE_S16LE +#define PA_SAMPLE_FLOAT32NE PA_SAMPLE_FLOAT32LE +typedef struct { + pa_sample_format_t *format; +} pa_remap_t; +typedef void (*pa_remap_func_t)(pa_remap_t *m, void *dst, const void *src, unsigned n); +typedef long long unsigned int pa_usec_t; + +#define pa_assert(x) assert(x) +#define pa_assert_not_reached() assert(0) + +#define PA_CLAMP_UNLIKELY(x, low, high) \ + (((x) < (low)) ? (low) : (((x) > (high)) ? (high) : (x))) + +static void pa_log_info(const char *format, ...) { + va_list ap; + char buf[1024]; + va_start(ap, format); + vsprintf(buf, format, ap); + printf("%s\n", buf); + va_end(ap); +} + +#define pa_log_debug pa_log_info + +static pa_usec_t pa_rtclock_now() { + struct timeval tv; + gettimeofday(&tv, NULL); + + return tv.tv_sec * 1000000ULL + tv.tv_usec; +} + +static void remap_mono_to_stereo_c(pa_remap_t *m, void *dst, const void *src, unsigned n) { + unsigned i; + + switch (*m->format) { + case PA_SAMPLE_FLOAT32NE: + { + float *d, *s; + + d = (float *) dst; + s = (float *) src; + + for (i = n >> 2; i; i--) { + d[0] = d[1] = s[0]; + d[2] = d[3] = s[1]; + d[4] = d[5] = s[2]; + d[6] = d[7] = s[3]; + s += 4; + d += 8; + } + for (i = n & 3; i; i--) { + d[0] = d[1] = s[0]; + s++; + d += 2; + } + break; + } + case PA_SAMPLE_S16NE: + { + int16_t *d, *s; + + d = (int16_t *) dst; + s = (int16_t *) src; + + for (i = n >> 2; i; i--) { + d[0] = d[1] = s[0]; + d[2] = d[3] = s[1]; + d[4] = d[5] = s[2]; + d[6] = d[7] = s[3]; + s += 4; + d += 8; + } + for (i = n & 3; i; i--) { + d[0] = d[1] = s[0]; + s++; + d += 2; + } + break; + } + default: + pa_assert_not_reached(); + } +} + +#if defined(__arm__) + +#include "arm_neon.h" + +void remap_mono_to_stereo_neon(pa_remap_t *m, void *dst, const void *src, unsigned n) { + unsigned i; + switch (*m->format) { + case PA_SAMPLE_FLOAT32NE: + { + float *d = (float *) dst, *s = (float *) src; + + for (i = 0; i < n/4; i++) { + float32x4x2_t stereo; + stereo.val[0] = vld1q_f32(s); + stereo.val[1] = stereo.val[0]; + vst2q_f32(d, stereo); + s += 4; + d += 8; + } + + for (i = n & ~3; i < n; i++) { + d[0] = d[1] = s[0]; + s++; + d += 2; + } + break; + } + case PA_SAMPLE_S16NE: + { + int16_t *d = (int16_t *) dst, *s = (int16_t *) src; + + for (i = 0; i < n/8; i++) { + int16x8x2_t stereo; + stereo.val[0] = vld1q_s16(s); + stereo.val[1] = stereo.val[0]; + vst2q_s16(d, stereo); + s += 8; + d += 16; + } + + for (i = n & ~7; i < n; i++) { + d[0] = d[1] = s[0]; + s++; + d += 2; + } + break; + } + default: + pa_assert_not_reached(); + } +} + +#define SAMPLES 1019 +#define TIMES 10000 + +static void run_test_float(void) { + float stereo[2*SAMPLES]; + float stereo_ref[2*SAMPLES]; + float mono[SAMPLES]; + int i; + pa_usec_t start, stop; + pa_remap_func_t func; + pa_sample_format_t sf; + pa_remap_t remap; + + pa_log_debug("checking NEON remap_mono_to_stereo(float, %d)", SAMPLES); + + memset(stereo_ref, 0, sizeof(stereo_ref)); + memset(stereo, 0, sizeof(stereo)); + + for (i = 0; i < SAMPLES; i++) { + mono[i] = rand()/(float) RAND_MAX - 0.5f; + } + + sf = PA_SAMPLE_FLOAT32NE; + remap.format = &sf; + func = (pa_remap_func_t) remap_mono_to_stereo_c; + func(&remap, stereo_ref, mono, SAMPLES); + remap_mono_to_stereo_neon(&remap, stereo, mono, SAMPLES); + + for (i = 0; i < 2*SAMPLES; i++) { + if (fabsf(stereo[i] - stereo_ref[i]) > 0.00001) { + pa_log_debug("%d: %.3f != %.3f (%.3f)", i, stereo[i], stereo_ref[i], + mono[i/2]); + } + } + + start = pa_rtclock_now(); + for (i = 0; i < TIMES; i++) { + remap_mono_to_stereo_neon(&remap, stereo, mono, SAMPLES); + } + stop = pa_rtclock_now(); + pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); + + start = pa_rtclock_now(); + for (i = 0; i < TIMES; i++) { + func(&remap, stereo_ref, mono, SAMPLES); + } + stop = pa_rtclock_now(); + pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); +} + +static void run_test_s16(void) { + int16_t stereo[2*SAMPLES]; + int16_t stereo_ref[2*SAMPLES]; + int16_t mono[SAMPLES]; + int i; + pa_usec_t start, stop; + pa_remap_func_t func; + pa_sample_format_t sf; + pa_remap_t remap; + + pa_log_debug("checking NEON remap_mono_to_stereo(s16, %d)", SAMPLES); + + memset(stereo_ref, 0, sizeof(stereo_ref)); + memset(stereo, 0, sizeof(stereo)); + + for (i = 0; i < SAMPLES; i++) { + mono[i] = rand() - RAND_MAX/2; + } + + sf = PA_SAMPLE_S16NE; + remap.format = &sf; + func = (pa_remap_func_t) remap_mono_to_stereo_c; + func(&remap, stereo_ref, mono, SAMPLES); + remap_mono_to_stereo_neon(&remap, stereo, mono, SAMPLES); + + for (i = 0; i < 2*SAMPLES; i++) { + if (abs(stereo[i] - stereo_ref[i]) > 0) { + pa_log_debug("%d: %d != %d (%d)", i, stereo[i], stereo_ref[i], + mono[i/2]); + } + } + + start = pa_rtclock_now(); + for (i = 0; i < TIMES; i++) { + remap_mono_to_stereo_neon(&remap, stereo, mono, SAMPLES); + } + stop = pa_rtclock_now(); + pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); + + start = pa_rtclock_now(); + for (i = 0; i < TIMES; i++) { + func(&remap, stereo_ref, mono, SAMPLES); + } + stop = pa_rtclock_now(); + pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); +} + +#endif /* defined(__arm__) */ + +int main() { + + run_test_float(); + run_test_s16(); + + return EXIT_SUCCESS; +}