Mercurial > hg > pa-neon
comparison remap_neon.c @ 0:e0040ee59c3c
import
| author | Peter Meerwald <p.meerwald@bct-electronic.com> | 
|---|---|
| date | Thu, 12 Jan 2012 17:27:46 +0100 | 
| parents | |
| children | b829afbea564 | 
   comparison
  equal
  deleted
  inserted
  replaced
| -1:000000000000 | 0:e0040ee59c3c | 
|---|---|
| 1 /* | |
| 2 * Copyright 2012 Peter Meerwald <p.meerwald@bct-electronic.com> | |
| 3 */ | |
| 4 | |
| 5 #include <stdlib.h> | |
| 6 #include <stdio.h> | |
| 7 #include <stdarg.h> | |
| 8 #include <string.h> | |
| 9 #include <math.h> | |
| 10 #include <sys/time.h> | |
| 11 #include <assert.h> | |
| 12 | |
| 13 | |
| 14 typedef short int16_t; | |
| 15 typedef enum pa_sample_format { | |
| 16 PA_SAMPLE_S16LE, | |
| 17 PA_SAMPLE_FLOAT32LE, | |
| 18 } pa_sample_format_t; | |
| 19 #define PA_SAMPLE_S16NE PA_SAMPLE_S16LE | |
| 20 #define PA_SAMPLE_FLOAT32NE PA_SAMPLE_FLOAT32LE | |
| 21 typedef struct { | |
| 22 pa_sample_format_t *format; | |
| 23 } pa_remap_t; | |
| 24 typedef void (*pa_remap_func_t)(pa_remap_t *m, void *dst, const void *src, unsigned n); | |
| 25 typedef long long unsigned int pa_usec_t; | |
| 26 | |
| 27 #define pa_assert(x) assert(x) | |
| 28 #define pa_assert_not_reached() assert(0) | |
| 29 | |
| 30 #define PA_CLAMP_UNLIKELY(x, low, high) \ | |
| 31 (((x) < (low)) ? (low) : (((x) > (high)) ? (high) : (x))) | |
| 32 | |
| 33 static void pa_log_info(const char *format, ...) { | |
| 34 va_list ap; | |
| 35 char buf[1024]; | |
| 36 va_start(ap, format); | |
| 37 vsprintf(buf, format, ap); | |
| 38 printf("%s\n", buf); | |
| 39 va_end(ap); | |
| 40 } | |
| 41 | |
| 42 #define pa_log_debug pa_log_info | |
| 43 | |
| 44 static pa_usec_t pa_rtclock_now() { | |
| 45 struct timeval tv; | |
| 46 gettimeofday(&tv, NULL); | |
| 47 | |
| 48 return tv.tv_sec * 1000000ULL + tv.tv_usec; | |
| 49 } | |
| 50 | |
| 51 static void remap_mono_to_stereo_c(pa_remap_t *m, void *dst, const void *src, unsigned n) { | |
| 52 unsigned i; | |
| 53 | |
| 54 switch (*m->format) { | |
| 55 case PA_SAMPLE_FLOAT32NE: | |
| 56 { | |
| 57 float *d, *s; | |
| 58 | |
| 59 d = (float *) dst; | |
| 60 s = (float *) src; | |
| 61 | |
| 62 for (i = n >> 2; i; i--) { | |
| 63 d[0] = d[1] = s[0]; | |
| 64 d[2] = d[3] = s[1]; | |
| 65 d[4] = d[5] = s[2]; | |
| 66 d[6] = d[7] = s[3]; | |
| 67 s += 4; | |
| 68 d += 8; | |
| 69 } | |
| 70 for (i = n & 3; i; i--) { | |
| 71 d[0] = d[1] = s[0]; | |
| 72 s++; | |
| 73 d += 2; | |
| 74 } | |
| 75 break; | |
| 76 } | |
| 77 case PA_SAMPLE_S16NE: | |
| 78 { | |
| 79 int16_t *d, *s; | |
| 80 | |
| 81 d = (int16_t *) dst; | |
| 82 s = (int16_t *) src; | |
| 83 | |
| 84 for (i = n >> 2; i; i--) { | |
| 85 d[0] = d[1] = s[0]; | |
| 86 d[2] = d[3] = s[1]; | |
| 87 d[4] = d[5] = s[2]; | |
| 88 d[6] = d[7] = s[3]; | |
| 89 s += 4; | |
| 90 d += 8; | |
| 91 } | |
| 92 for (i = n & 3; i; i--) { | |
| 93 d[0] = d[1] = s[0]; | |
| 94 s++; | |
| 95 d += 2; | |
| 96 } | |
| 97 break; | |
| 98 } | |
| 99 default: | |
| 100 pa_assert_not_reached(); | |
| 101 } | |
| 102 } | |
| 103 | |
| 104 #if defined(__arm__) | |
| 105 | |
| 106 #include "arm_neon.h" | |
| 107 | |
| 108 void remap_mono_to_stereo_neon(pa_remap_t *m, void *dst, const void *src, unsigned n) { | |
| 109 unsigned i; | |
| 110 switch (*m->format) { | |
| 111 case PA_SAMPLE_FLOAT32NE: | |
| 112 { | |
| 113 float *d = (float *) dst, *s = (float *) src; | |
| 114 | |
| 115 for (i = 0; i < n/4; i++) { | |
| 116 float32x4x2_t stereo; | |
| 117 stereo.val[0] = vld1q_f32(s); | |
| 118 stereo.val[1] = stereo.val[0]; | |
| 119 vst2q_f32(d, stereo); | |
| 120 s += 4; | |
| 121 d += 8; | |
| 122 } | |
| 123 | |
| 124 for (i = n & ~3; i < n; i++) { | |
| 125 d[0] = d[1] = s[0]; | |
| 126 s++; | |
| 127 d += 2; | |
| 128 } | |
| 129 break; | |
| 130 } | |
| 131 case PA_SAMPLE_S16NE: | |
| 132 { | |
| 133 int16_t *d = (int16_t *) dst, *s = (int16_t *) src; | |
| 134 | |
| 135 for (i = 0; i < n/8; i++) { | |
| 136 int16x8x2_t stereo; | |
| 137 stereo.val[0] = vld1q_s16(s); | |
| 138 stereo.val[1] = stereo.val[0]; | |
| 139 vst2q_s16(d, stereo); | |
| 140 s += 8; | |
| 141 d += 16; | |
| 142 } | |
| 143 | |
| 144 for (i = n & ~7; i < n; i++) { | |
| 145 d[0] = d[1] = s[0]; | |
| 146 s++; | |
| 147 d += 2; | |
| 148 } | |
| 149 break; | |
| 150 } | |
| 151 default: | |
| 152 pa_assert_not_reached(); | |
| 153 } | |
| 154 } | |
| 155 | |
| 156 #define SAMPLES 1019 | |
| 157 #define TIMES 10000 | |
| 158 | |
| 159 static void run_test_float(void) { | |
| 160 float stereo[2*SAMPLES]; | |
| 161 float stereo_ref[2*SAMPLES]; | |
| 162 float mono[SAMPLES]; | |
| 163 int i; | |
| 164 pa_usec_t start, stop; | |
| 165 pa_remap_func_t func; | |
| 166 pa_sample_format_t sf; | |
| 167 pa_remap_t remap; | |
| 168 | |
| 169 pa_log_debug("checking NEON remap_mono_to_stereo(float, %d)", SAMPLES); | |
| 170 | |
| 171 memset(stereo_ref, 0, sizeof(stereo_ref)); | |
| 172 memset(stereo, 0, sizeof(stereo)); | |
| 173 | |
| 174 for (i = 0; i < SAMPLES; i++) { | |
| 175 mono[i] = rand()/(float) RAND_MAX - 0.5f; | |
| 176 } | |
| 177 | |
| 178 sf = PA_SAMPLE_FLOAT32NE; | |
| 179 remap.format = &sf; | |
| 180 func = (pa_remap_func_t) remap_mono_to_stereo_c; | |
| 181 func(&remap, stereo_ref, mono, SAMPLES); | |
| 182 remap_mono_to_stereo_neon(&remap, stereo, mono, SAMPLES); | |
| 183 | |
| 184 for (i = 0; i < 2*SAMPLES; i++) { | |
| 185 if (fabsf(stereo[i] - stereo_ref[i]) > 0.00001) { | |
| 186 pa_log_debug("%d: %.3f != %.3f (%.3f)", i, stereo[i], stereo_ref[i], | |
| 187 mono[i/2]); | |
| 188 } | |
| 189 } | |
| 190 | |
| 191 start = pa_rtclock_now(); | |
| 192 for (i = 0; i < TIMES; i++) { | |
| 193 remap_mono_to_stereo_neon(&remap, stereo, mono, SAMPLES); | |
| 194 } | |
| 195 stop = pa_rtclock_now(); | |
| 196 pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); | |
| 197 | |
| 198 start = pa_rtclock_now(); | |
| 199 for (i = 0; i < TIMES; i++) { | |
| 200 func(&remap, stereo_ref, mono, SAMPLES); | |
| 201 } | |
| 202 stop = pa_rtclock_now(); | |
| 203 pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); | |
| 204 } | |
| 205 | |
| 206 static void run_test_s16(void) { | |
| 207 int16_t stereo[2*SAMPLES]; | |
| 208 int16_t stereo_ref[2*SAMPLES]; | |
| 209 int16_t mono[SAMPLES]; | |
| 210 int i; | |
| 211 pa_usec_t start, stop; | |
| 212 pa_remap_func_t func; | |
| 213 pa_sample_format_t sf; | |
| 214 pa_remap_t remap; | |
| 215 | |
| 216 pa_log_debug("checking NEON remap_mono_to_stereo(s16, %d)", SAMPLES); | |
| 217 | |
| 218 memset(stereo_ref, 0, sizeof(stereo_ref)); | |
| 219 memset(stereo, 0, sizeof(stereo)); | |
| 220 | |
| 221 for (i = 0; i < SAMPLES; i++) { | |
| 222 mono[i] = rand() - RAND_MAX/2; | |
| 223 } | |
| 224 | |
| 225 sf = PA_SAMPLE_S16NE; | |
| 226 remap.format = &sf; | |
| 227 func = (pa_remap_func_t) remap_mono_to_stereo_c; | |
| 228 func(&remap, stereo_ref, mono, SAMPLES); | |
| 229 remap_mono_to_stereo_neon(&remap, stereo, mono, SAMPLES); | |
| 230 | |
| 231 for (i = 0; i < 2*SAMPLES; i++) { | |
| 232 if (abs(stereo[i] - stereo_ref[i]) > 0) { | |
| 233 pa_log_debug("%d: %d != %d (%d)", i, stereo[i], stereo_ref[i], | |
| 234 mono[i/2]); | |
| 235 } | |
| 236 } | |
| 237 | |
| 238 start = pa_rtclock_now(); | |
| 239 for (i = 0; i < TIMES; i++) { | |
| 240 remap_mono_to_stereo_neon(&remap, stereo, mono, SAMPLES); | |
| 241 } | |
| 242 stop = pa_rtclock_now(); | |
| 243 pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); | |
| 244 | |
| 245 start = pa_rtclock_now(); | |
| 246 for (i = 0; i < TIMES; i++) { | |
| 247 func(&remap, stereo_ref, mono, SAMPLES); | |
| 248 } | |
| 249 stop = pa_rtclock_now(); | |
| 250 pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); | |
| 251 } | |
| 252 | |
| 253 #endif /* defined(__arm__) */ | |
| 254 | |
| 255 int main() { | |
| 256 | |
| 257 run_test_float(); | |
| 258 run_test_s16(); | |
| 259 | |
| 260 return EXIT_SUCCESS; | |
| 261 } | 
