Mercurial > hg > pa-neon
comparison remap_neon.c @ 0:e0040ee59c3c
import
author | Peter Meerwald <p.meerwald@bct-electronic.com> |
---|---|
date | Thu, 12 Jan 2012 17:27:46 +0100 |
parents | |
children | b829afbea564 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e0040ee59c3c |
---|---|
1 /* | |
2 * Copyright 2012 Peter Meerwald <p.meerwald@bct-electronic.com> | |
3 */ | |
4 | |
5 #include <stdlib.h> | |
6 #include <stdio.h> | |
7 #include <stdarg.h> | |
8 #include <string.h> | |
9 #include <math.h> | |
10 #include <sys/time.h> | |
11 #include <assert.h> | |
12 | |
13 | |
14 typedef short int16_t; | |
15 typedef enum pa_sample_format { | |
16 PA_SAMPLE_S16LE, | |
17 PA_SAMPLE_FLOAT32LE, | |
18 } pa_sample_format_t; | |
19 #define PA_SAMPLE_S16NE PA_SAMPLE_S16LE | |
20 #define PA_SAMPLE_FLOAT32NE PA_SAMPLE_FLOAT32LE | |
21 typedef struct { | |
22 pa_sample_format_t *format; | |
23 } pa_remap_t; | |
24 typedef void (*pa_remap_func_t)(pa_remap_t *m, void *dst, const void *src, unsigned n); | |
25 typedef long long unsigned int pa_usec_t; | |
26 | |
27 #define pa_assert(x) assert(x) | |
28 #define pa_assert_not_reached() assert(0) | |
29 | |
30 #define PA_CLAMP_UNLIKELY(x, low, high) \ | |
31 (((x) < (low)) ? (low) : (((x) > (high)) ? (high) : (x))) | |
32 | |
33 static void pa_log_info(const char *format, ...) { | |
34 va_list ap; | |
35 char buf[1024]; | |
36 va_start(ap, format); | |
37 vsprintf(buf, format, ap); | |
38 printf("%s\n", buf); | |
39 va_end(ap); | |
40 } | |
41 | |
42 #define pa_log_debug pa_log_info | |
43 | |
44 static pa_usec_t pa_rtclock_now() { | |
45 struct timeval tv; | |
46 gettimeofday(&tv, NULL); | |
47 | |
48 return tv.tv_sec * 1000000ULL + tv.tv_usec; | |
49 } | |
50 | |
51 static void remap_mono_to_stereo_c(pa_remap_t *m, void *dst, const void *src, unsigned n) { | |
52 unsigned i; | |
53 | |
54 switch (*m->format) { | |
55 case PA_SAMPLE_FLOAT32NE: | |
56 { | |
57 float *d, *s; | |
58 | |
59 d = (float *) dst; | |
60 s = (float *) src; | |
61 | |
62 for (i = n >> 2; i; i--) { | |
63 d[0] = d[1] = s[0]; | |
64 d[2] = d[3] = s[1]; | |
65 d[4] = d[5] = s[2]; | |
66 d[6] = d[7] = s[3]; | |
67 s += 4; | |
68 d += 8; | |
69 } | |
70 for (i = n & 3; i; i--) { | |
71 d[0] = d[1] = s[0]; | |
72 s++; | |
73 d += 2; | |
74 } | |
75 break; | |
76 } | |
77 case PA_SAMPLE_S16NE: | |
78 { | |
79 int16_t *d, *s; | |
80 | |
81 d = (int16_t *) dst; | |
82 s = (int16_t *) src; | |
83 | |
84 for (i = n >> 2; i; i--) { | |
85 d[0] = d[1] = s[0]; | |
86 d[2] = d[3] = s[1]; | |
87 d[4] = d[5] = s[2]; | |
88 d[6] = d[7] = s[3]; | |
89 s += 4; | |
90 d += 8; | |
91 } | |
92 for (i = n & 3; i; i--) { | |
93 d[0] = d[1] = s[0]; | |
94 s++; | |
95 d += 2; | |
96 } | |
97 break; | |
98 } | |
99 default: | |
100 pa_assert_not_reached(); | |
101 } | |
102 } | |
103 | |
104 #if defined(__arm__) | |
105 | |
106 #include "arm_neon.h" | |
107 | |
108 void remap_mono_to_stereo_neon(pa_remap_t *m, void *dst, const void *src, unsigned n) { | |
109 unsigned i; | |
110 switch (*m->format) { | |
111 case PA_SAMPLE_FLOAT32NE: | |
112 { | |
113 float *d = (float *) dst, *s = (float *) src; | |
114 | |
115 for (i = 0; i < n/4; i++) { | |
116 float32x4x2_t stereo; | |
117 stereo.val[0] = vld1q_f32(s); | |
118 stereo.val[1] = stereo.val[0]; | |
119 vst2q_f32(d, stereo); | |
120 s += 4; | |
121 d += 8; | |
122 } | |
123 | |
124 for (i = n & ~3; i < n; i++) { | |
125 d[0] = d[1] = s[0]; | |
126 s++; | |
127 d += 2; | |
128 } | |
129 break; | |
130 } | |
131 case PA_SAMPLE_S16NE: | |
132 { | |
133 int16_t *d = (int16_t *) dst, *s = (int16_t *) src; | |
134 | |
135 for (i = 0; i < n/8; i++) { | |
136 int16x8x2_t stereo; | |
137 stereo.val[0] = vld1q_s16(s); | |
138 stereo.val[1] = stereo.val[0]; | |
139 vst2q_s16(d, stereo); | |
140 s += 8; | |
141 d += 16; | |
142 } | |
143 | |
144 for (i = n & ~7; i < n; i++) { | |
145 d[0] = d[1] = s[0]; | |
146 s++; | |
147 d += 2; | |
148 } | |
149 break; | |
150 } | |
151 default: | |
152 pa_assert_not_reached(); | |
153 } | |
154 } | |
155 | |
156 #define SAMPLES 1019 | |
157 #define TIMES 10000 | |
158 | |
159 static void run_test_float(void) { | |
160 float stereo[2*SAMPLES]; | |
161 float stereo_ref[2*SAMPLES]; | |
162 float mono[SAMPLES]; | |
163 int i; | |
164 pa_usec_t start, stop; | |
165 pa_remap_func_t func; | |
166 pa_sample_format_t sf; | |
167 pa_remap_t remap; | |
168 | |
169 pa_log_debug("checking NEON remap_mono_to_stereo(float, %d)", SAMPLES); | |
170 | |
171 memset(stereo_ref, 0, sizeof(stereo_ref)); | |
172 memset(stereo, 0, sizeof(stereo)); | |
173 | |
174 for (i = 0; i < SAMPLES; i++) { | |
175 mono[i] = rand()/(float) RAND_MAX - 0.5f; | |
176 } | |
177 | |
178 sf = PA_SAMPLE_FLOAT32NE; | |
179 remap.format = &sf; | |
180 func = (pa_remap_func_t) remap_mono_to_stereo_c; | |
181 func(&remap, stereo_ref, mono, SAMPLES); | |
182 remap_mono_to_stereo_neon(&remap, stereo, mono, SAMPLES); | |
183 | |
184 for (i = 0; i < 2*SAMPLES; i++) { | |
185 if (fabsf(stereo[i] - stereo_ref[i]) > 0.00001) { | |
186 pa_log_debug("%d: %.3f != %.3f (%.3f)", i, stereo[i], stereo_ref[i], | |
187 mono[i/2]); | |
188 } | |
189 } | |
190 | |
191 start = pa_rtclock_now(); | |
192 for (i = 0; i < TIMES; i++) { | |
193 remap_mono_to_stereo_neon(&remap, stereo, mono, SAMPLES); | |
194 } | |
195 stop = pa_rtclock_now(); | |
196 pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); | |
197 | |
198 start = pa_rtclock_now(); | |
199 for (i = 0; i < TIMES; i++) { | |
200 func(&remap, stereo_ref, mono, SAMPLES); | |
201 } | |
202 stop = pa_rtclock_now(); | |
203 pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); | |
204 } | |
205 | |
206 static void run_test_s16(void) { | |
207 int16_t stereo[2*SAMPLES]; | |
208 int16_t stereo_ref[2*SAMPLES]; | |
209 int16_t mono[SAMPLES]; | |
210 int i; | |
211 pa_usec_t start, stop; | |
212 pa_remap_func_t func; | |
213 pa_sample_format_t sf; | |
214 pa_remap_t remap; | |
215 | |
216 pa_log_debug("checking NEON remap_mono_to_stereo(s16, %d)", SAMPLES); | |
217 | |
218 memset(stereo_ref, 0, sizeof(stereo_ref)); | |
219 memset(stereo, 0, sizeof(stereo)); | |
220 | |
221 for (i = 0; i < SAMPLES; i++) { | |
222 mono[i] = rand() - RAND_MAX/2; | |
223 } | |
224 | |
225 sf = PA_SAMPLE_S16NE; | |
226 remap.format = &sf; | |
227 func = (pa_remap_func_t) remap_mono_to_stereo_c; | |
228 func(&remap, stereo_ref, mono, SAMPLES); | |
229 remap_mono_to_stereo_neon(&remap, stereo, mono, SAMPLES); | |
230 | |
231 for (i = 0; i < 2*SAMPLES; i++) { | |
232 if (abs(stereo[i] - stereo_ref[i]) > 0) { | |
233 pa_log_debug("%d: %d != %d (%d)", i, stereo[i], stereo_ref[i], | |
234 mono[i/2]); | |
235 } | |
236 } | |
237 | |
238 start = pa_rtclock_now(); | |
239 for (i = 0; i < TIMES; i++) { | |
240 remap_mono_to_stereo_neon(&remap, stereo, mono, SAMPLES); | |
241 } | |
242 stop = pa_rtclock_now(); | |
243 pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); | |
244 | |
245 start = pa_rtclock_now(); | |
246 for (i = 0; i < TIMES; i++) { | |
247 func(&remap, stereo_ref, mono, SAMPLES); | |
248 } | |
249 stop = pa_rtclock_now(); | |
250 pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); | |
251 } | |
252 | |
253 #endif /* defined(__arm__) */ | |
254 | |
255 int main() { | |
256 | |
257 run_test_float(); | |
258 run_test_s16(); | |
259 | |
260 return EXIT_SUCCESS; | |
261 } |