view remap_neon.c @ 0:e0040ee59c3c

import
author Peter Meerwald <p.meerwald@bct-electronic.com>
date Thu, 12 Jan 2012 17:27:46 +0100
parents
children b829afbea564
line wrap: on
line source

/*
 * Copyright 2012 Peter Meerwald <p.meerwald@bct-electronic.com>
 */

#include <stdlib.h>
#include <stdio.h>
#include <stdarg.h>
#include <string.h>
#include <math.h>
#include <sys/time.h>
#include <assert.h>


typedef short int16_t;
typedef enum pa_sample_format {
    PA_SAMPLE_S16LE,
    PA_SAMPLE_FLOAT32LE,
} pa_sample_format_t;
#define PA_SAMPLE_S16NE PA_SAMPLE_S16LE
#define PA_SAMPLE_FLOAT32NE PA_SAMPLE_FLOAT32LE
typedef struct {
    pa_sample_format_t *format;
} pa_remap_t;
typedef void (*pa_remap_func_t)(pa_remap_t *m, void *dst, const void *src, unsigned n);
typedef long long unsigned int pa_usec_t;

#define pa_assert(x) assert(x)
#define pa_assert_not_reached() assert(0)

#define PA_CLAMP_UNLIKELY(x, low, high) \
    (((x) < (low)) ? (low) : (((x) > (high)) ? (high) : (x)))

static void pa_log_info(const char *format, ...)  {
    va_list ap;
    char buf[1024];
    va_start(ap, format);
    vsprintf(buf, format, ap);
    printf("%s\n", buf);
    va_end(ap);
}

#define pa_log_debug pa_log_info

static pa_usec_t pa_rtclock_now() {
    struct timeval tv;
    gettimeofday(&tv, NULL);

    return tv.tv_sec * 1000000ULL + tv.tv_usec;
}

static void remap_mono_to_stereo_c(pa_remap_t *m, void *dst, const void *src, unsigned n) {
    unsigned i;

    switch (*m->format) {
        case PA_SAMPLE_FLOAT32NE:
        {
            float *d, *s;

            d = (float *) dst;
            s = (float *) src;

            for (i = n >> 2; i; i--) {
                d[0] = d[1] = s[0];
                d[2] = d[3] = s[1];
                d[4] = d[5] = s[2];
                d[6] = d[7] = s[3];
                s += 4;
                d += 8;
            }
            for (i = n & 3; i; i--) {
                d[0] = d[1] = s[0];
                s++;
                d += 2;
            }
            break;
        }
        case PA_SAMPLE_S16NE:
        {
            int16_t *d, *s;

            d = (int16_t *) dst;
            s = (int16_t *) src;

            for (i = n >> 2; i; i--) {
                d[0] = d[1] = s[0];
                d[2] = d[3] = s[1];
                d[4] = d[5] = s[2];
                d[6] = d[7] = s[3];
                s += 4;
                d += 8;
            }
            for (i = n & 3; i; i--) {
                d[0] = d[1] = s[0];
                s++;
                d += 2;
            }
            break;
        }
        default:
            pa_assert_not_reached();
    }
}

#if defined(__arm__)

#include "arm_neon.h"

void remap_mono_to_stereo_neon(pa_remap_t *m, void *dst, const void *src, unsigned n) {
    unsigned i;
    switch (*m->format) {
        case PA_SAMPLE_FLOAT32NE:
        {
            float *d = (float *) dst, *s = (float *) src;
            
            for (i = 0; i < n/4; i++) {
                float32x4x2_t stereo; 
                stereo.val[0] = vld1q_f32(s);
                stereo.val[1] = stereo.val[0];
                vst2q_f32(d, stereo);
                s += 4;
                d += 8;
            }
            
            for (i = n & ~3; i < n; i++) {
                d[0] = d[1] = s[0];
                s++;
                d += 2;
            }
            break;
        }
        case PA_SAMPLE_S16NE:
        {
            int16_t *d = (int16_t *) dst, *s = (int16_t *) src;

            for (i = 0; i < n/8; i++) {
                int16x8x2_t stereo; 
                stereo.val[0] = vld1q_s16(s);
                stereo.val[1] = stereo.val[0];
                vst2q_s16(d, stereo);
                s += 8;
                d += 16;
            }

            for (i = n & ~7; i < n; i++) {
                d[0] = d[1] = s[0];
                s++;
                d += 2;
            }
            break;
        }
        default:
            pa_assert_not_reached();
    }
}

#define SAMPLES 1019
#define TIMES 10000

static void run_test_float(void) {
    float stereo[2*SAMPLES];
    float stereo_ref[2*SAMPLES];
    float mono[SAMPLES];
    int i;
    pa_usec_t start, stop;
    pa_remap_func_t func;
    pa_sample_format_t sf;
    pa_remap_t remap;

    pa_log_debug("checking NEON remap_mono_to_stereo(float, %d)", SAMPLES);

    memset(stereo_ref, 0, sizeof(stereo_ref));
    memset(stereo, 0, sizeof(stereo));

    for (i = 0; i < SAMPLES; i++) {
        mono[i] = rand()/(float) RAND_MAX - 0.5f;
    }

    sf = PA_SAMPLE_FLOAT32NE;
    remap.format = &sf;
    func = (pa_remap_func_t) remap_mono_to_stereo_c;
    func(&remap, stereo_ref, mono, SAMPLES);
    remap_mono_to_stereo_neon(&remap, stereo, mono, SAMPLES);

    for (i = 0; i < 2*SAMPLES; i++) {
        if (fabsf(stereo[i] - stereo_ref[i]) > 0.00001) {
            pa_log_debug("%d: %.3f != %.3f (%.3f)", i, stereo[i], stereo_ref[i],
                      mono[i/2]);
        }
    }

    start = pa_rtclock_now();
    for (i = 0; i < TIMES; i++) {
        remap_mono_to_stereo_neon(&remap, stereo, mono, SAMPLES);
    }
    stop = pa_rtclock_now();
    pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start));

    start = pa_rtclock_now();
    for (i = 0; i < TIMES; i++) {
        func(&remap, stereo_ref, mono, SAMPLES);
    }
    stop = pa_rtclock_now();
    pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start));
}

static void run_test_s16(void) {
    int16_t stereo[2*SAMPLES];
    int16_t stereo_ref[2*SAMPLES];
    int16_t mono[SAMPLES];
    int i;
    pa_usec_t start, stop;
    pa_remap_func_t func;
    pa_sample_format_t sf;
    pa_remap_t remap;

    pa_log_debug("checking NEON remap_mono_to_stereo(s16, %d)", SAMPLES);

    memset(stereo_ref, 0, sizeof(stereo_ref));
    memset(stereo, 0, sizeof(stereo));

    for (i = 0; i < SAMPLES; i++) {
        mono[i] = rand() - RAND_MAX/2;
    }

    sf = PA_SAMPLE_S16NE;
    remap.format = &sf;
    func = (pa_remap_func_t) remap_mono_to_stereo_c;
    func(&remap, stereo_ref, mono, SAMPLES);
    remap_mono_to_stereo_neon(&remap, stereo, mono, SAMPLES);

    for (i = 0; i < 2*SAMPLES; i++) {
        if (abs(stereo[i] - stereo_ref[i]) > 0) {
            pa_log_debug("%d: %d != %d (%d)", i, stereo[i], stereo_ref[i],
                      mono[i/2]);
        }
    }

    start = pa_rtclock_now();
    for (i = 0; i < TIMES; i++) {
        remap_mono_to_stereo_neon(&remap, stereo, mono, SAMPLES);
    }
    stop = pa_rtclock_now();
    pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start));

    start = pa_rtclock_now();
    for (i = 0; i < TIMES; i++) {
        func(&remap, stereo_ref, mono, SAMPLES);
    }
    stop = pa_rtclock_now();
    pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start));
}

#endif /* defined(__arm__) */

int main() {

    run_test_float();
    run_test_s16();

    return EXIT_SUCCESS;
}

Repositories maintained by Peter Meerwald, pmeerw@pmeerw.net.