Mercurial > hg > pa-neon
view remap_neon.c @ 0:e0040ee59c3c
import
author | Peter Meerwald <p.meerwald@bct-electronic.com> |
---|---|
date | Thu, 12 Jan 2012 17:27:46 +0100 |
parents | |
children | b829afbea564 |
line wrap: on
line source
/* * Copyright 2012 Peter Meerwald <p.meerwald@bct-electronic.com> */ #include <stdlib.h> #include <stdio.h> #include <stdarg.h> #include <string.h> #include <math.h> #include <sys/time.h> #include <assert.h> typedef short int16_t; typedef enum pa_sample_format { PA_SAMPLE_S16LE, PA_SAMPLE_FLOAT32LE, } pa_sample_format_t; #define PA_SAMPLE_S16NE PA_SAMPLE_S16LE #define PA_SAMPLE_FLOAT32NE PA_SAMPLE_FLOAT32LE typedef struct { pa_sample_format_t *format; } pa_remap_t; typedef void (*pa_remap_func_t)(pa_remap_t *m, void *dst, const void *src, unsigned n); typedef long long unsigned int pa_usec_t; #define pa_assert(x) assert(x) #define pa_assert_not_reached() assert(0) #define PA_CLAMP_UNLIKELY(x, low, high) \ (((x) < (low)) ? (low) : (((x) > (high)) ? (high) : (x))) static void pa_log_info(const char *format, ...) { va_list ap; char buf[1024]; va_start(ap, format); vsprintf(buf, format, ap); printf("%s\n", buf); va_end(ap); } #define pa_log_debug pa_log_info static pa_usec_t pa_rtclock_now() { struct timeval tv; gettimeofday(&tv, NULL); return tv.tv_sec * 1000000ULL + tv.tv_usec; } static void remap_mono_to_stereo_c(pa_remap_t *m, void *dst, const void *src, unsigned n) { unsigned i; switch (*m->format) { case PA_SAMPLE_FLOAT32NE: { float *d, *s; d = (float *) dst; s = (float *) src; for (i = n >> 2; i; i--) { d[0] = d[1] = s[0]; d[2] = d[3] = s[1]; d[4] = d[5] = s[2]; d[6] = d[7] = s[3]; s += 4; d += 8; } for (i = n & 3; i; i--) { d[0] = d[1] = s[0]; s++; d += 2; } break; } case PA_SAMPLE_S16NE: { int16_t *d, *s; d = (int16_t *) dst; s = (int16_t *) src; for (i = n >> 2; i; i--) { d[0] = d[1] = s[0]; d[2] = d[3] = s[1]; d[4] = d[5] = s[2]; d[6] = d[7] = s[3]; s += 4; d += 8; } for (i = n & 3; i; i--) { d[0] = d[1] = s[0]; s++; d += 2; } break; } default: pa_assert_not_reached(); } } #if defined(__arm__) #include "arm_neon.h" void remap_mono_to_stereo_neon(pa_remap_t *m, void *dst, const void *src, unsigned n) { unsigned i; switch (*m->format) { case PA_SAMPLE_FLOAT32NE: { float *d = (float *) dst, *s = (float *) src; for (i = 0; i < n/4; i++) { float32x4x2_t stereo; stereo.val[0] = vld1q_f32(s); stereo.val[1] = stereo.val[0]; vst2q_f32(d, stereo); s += 4; d += 8; } for (i = n & ~3; i < n; i++) { d[0] = d[1] = s[0]; s++; d += 2; } break; } case PA_SAMPLE_S16NE: { int16_t *d = (int16_t *) dst, *s = (int16_t *) src; for (i = 0; i < n/8; i++) { int16x8x2_t stereo; stereo.val[0] = vld1q_s16(s); stereo.val[1] = stereo.val[0]; vst2q_s16(d, stereo); s += 8; d += 16; } for (i = n & ~7; i < n; i++) { d[0] = d[1] = s[0]; s++; d += 2; } break; } default: pa_assert_not_reached(); } } #define SAMPLES 1019 #define TIMES 10000 static void run_test_float(void) { float stereo[2*SAMPLES]; float stereo_ref[2*SAMPLES]; float mono[SAMPLES]; int i; pa_usec_t start, stop; pa_remap_func_t func; pa_sample_format_t sf; pa_remap_t remap; pa_log_debug("checking NEON remap_mono_to_stereo(float, %d)", SAMPLES); memset(stereo_ref, 0, sizeof(stereo_ref)); memset(stereo, 0, sizeof(stereo)); for (i = 0; i < SAMPLES; i++) { mono[i] = rand()/(float) RAND_MAX - 0.5f; } sf = PA_SAMPLE_FLOAT32NE; remap.format = &sf; func = (pa_remap_func_t) remap_mono_to_stereo_c; func(&remap, stereo_ref, mono, SAMPLES); remap_mono_to_stereo_neon(&remap, stereo, mono, SAMPLES); for (i = 0; i < 2*SAMPLES; i++) { if (fabsf(stereo[i] - stereo_ref[i]) > 0.00001) { pa_log_debug("%d: %.3f != %.3f (%.3f)", i, stereo[i], stereo_ref[i], mono[i/2]); } } start = pa_rtclock_now(); for (i = 0; i < TIMES; i++) { remap_mono_to_stereo_neon(&remap, stereo, mono, SAMPLES); } stop = pa_rtclock_now(); pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); start = pa_rtclock_now(); for (i = 0; i < TIMES; i++) { func(&remap, stereo_ref, mono, SAMPLES); } stop = pa_rtclock_now(); pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); } static void run_test_s16(void) { int16_t stereo[2*SAMPLES]; int16_t stereo_ref[2*SAMPLES]; int16_t mono[SAMPLES]; int i; pa_usec_t start, stop; pa_remap_func_t func; pa_sample_format_t sf; pa_remap_t remap; pa_log_debug("checking NEON remap_mono_to_stereo(s16, %d)", SAMPLES); memset(stereo_ref, 0, sizeof(stereo_ref)); memset(stereo, 0, sizeof(stereo)); for (i = 0; i < SAMPLES; i++) { mono[i] = rand() - RAND_MAX/2; } sf = PA_SAMPLE_S16NE; remap.format = &sf; func = (pa_remap_func_t) remap_mono_to_stereo_c; func(&remap, stereo_ref, mono, SAMPLES); remap_mono_to_stereo_neon(&remap, stereo, mono, SAMPLES); for (i = 0; i < 2*SAMPLES; i++) { if (abs(stereo[i] - stereo_ref[i]) > 0) { pa_log_debug("%d: %d != %d (%d)", i, stereo[i], stereo_ref[i], mono[i/2]); } } start = pa_rtclock_now(); for (i = 0; i < TIMES; i++) { remap_mono_to_stereo_neon(&remap, stereo, mono, SAMPLES); } stop = pa_rtclock_now(); pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); start = pa_rtclock_now(); for (i = 0; i < TIMES; i++) { func(&remap, stereo_ref, mono, SAMPLES); } stop = pa_rtclock_now(); pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); } #endif /* defined(__arm__) */ int main() { run_test_float(); run_test_s16(); return EXIT_SUCCESS; }