comparison remap_neon.c @ 0:e0040ee59c3c

import
author Peter Meerwald <p.meerwald@bct-electronic.com>
date Thu, 12 Jan 2012 17:27:46 +0100
parents
children b829afbea564
comparison
equal deleted inserted replaced
-1:000000000000 0:e0040ee59c3c
1 /*
2 * Copyright 2012 Peter Meerwald <p.meerwald@bct-electronic.com>
3 */
4
5 #include <stdlib.h>
6 #include <stdio.h>
7 #include <stdarg.h>
8 #include <string.h>
9 #include <math.h>
10 #include <sys/time.h>
11 #include <assert.h>
12
13
14 typedef short int16_t;
15 typedef enum pa_sample_format {
16 PA_SAMPLE_S16LE,
17 PA_SAMPLE_FLOAT32LE,
18 } pa_sample_format_t;
19 #define PA_SAMPLE_S16NE PA_SAMPLE_S16LE
20 #define PA_SAMPLE_FLOAT32NE PA_SAMPLE_FLOAT32LE
21 typedef struct {
22 pa_sample_format_t *format;
23 } pa_remap_t;
24 typedef void (*pa_remap_func_t)(pa_remap_t *m, void *dst, const void *src, unsigned n);
25 typedef long long unsigned int pa_usec_t;
26
27 #define pa_assert(x) assert(x)
28 #define pa_assert_not_reached() assert(0)
29
30 #define PA_CLAMP_UNLIKELY(x, low, high) \
31 (((x) < (low)) ? (low) : (((x) > (high)) ? (high) : (x)))
32
33 static void pa_log_info(const char *format, ...) {
34 va_list ap;
35 char buf[1024];
36 va_start(ap, format);
37 vsprintf(buf, format, ap);
38 printf("%s\n", buf);
39 va_end(ap);
40 }
41
42 #define pa_log_debug pa_log_info
43
44 static pa_usec_t pa_rtclock_now() {
45 struct timeval tv;
46 gettimeofday(&tv, NULL);
47
48 return tv.tv_sec * 1000000ULL + tv.tv_usec;
49 }
50
51 static void remap_mono_to_stereo_c(pa_remap_t *m, void *dst, const void *src, unsigned n) {
52 unsigned i;
53
54 switch (*m->format) {
55 case PA_SAMPLE_FLOAT32NE:
56 {
57 float *d, *s;
58
59 d = (float *) dst;
60 s = (float *) src;
61
62 for (i = n >> 2; i; i--) {
63 d[0] = d[1] = s[0];
64 d[2] = d[3] = s[1];
65 d[4] = d[5] = s[2];
66 d[6] = d[7] = s[3];
67 s += 4;
68 d += 8;
69 }
70 for (i = n & 3; i; i--) {
71 d[0] = d[1] = s[0];
72 s++;
73 d += 2;
74 }
75 break;
76 }
77 case PA_SAMPLE_S16NE:
78 {
79 int16_t *d, *s;
80
81 d = (int16_t *) dst;
82 s = (int16_t *) src;
83
84 for (i = n >> 2; i; i--) {
85 d[0] = d[1] = s[0];
86 d[2] = d[3] = s[1];
87 d[4] = d[5] = s[2];
88 d[6] = d[7] = s[3];
89 s += 4;
90 d += 8;
91 }
92 for (i = n & 3; i; i--) {
93 d[0] = d[1] = s[0];
94 s++;
95 d += 2;
96 }
97 break;
98 }
99 default:
100 pa_assert_not_reached();
101 }
102 }
103
104 #if defined(__arm__)
105
106 #include "arm_neon.h"
107
108 void remap_mono_to_stereo_neon(pa_remap_t *m, void *dst, const void *src, unsigned n) {
109 unsigned i;
110 switch (*m->format) {
111 case PA_SAMPLE_FLOAT32NE:
112 {
113 float *d = (float *) dst, *s = (float *) src;
114
115 for (i = 0; i < n/4; i++) {
116 float32x4x2_t stereo;
117 stereo.val[0] = vld1q_f32(s);
118 stereo.val[1] = stereo.val[0];
119 vst2q_f32(d, stereo);
120 s += 4;
121 d += 8;
122 }
123
124 for (i = n & ~3; i < n; i++) {
125 d[0] = d[1] = s[0];
126 s++;
127 d += 2;
128 }
129 break;
130 }
131 case PA_SAMPLE_S16NE:
132 {
133 int16_t *d = (int16_t *) dst, *s = (int16_t *) src;
134
135 for (i = 0; i < n/8; i++) {
136 int16x8x2_t stereo;
137 stereo.val[0] = vld1q_s16(s);
138 stereo.val[1] = stereo.val[0];
139 vst2q_s16(d, stereo);
140 s += 8;
141 d += 16;
142 }
143
144 for (i = n & ~7; i < n; i++) {
145 d[0] = d[1] = s[0];
146 s++;
147 d += 2;
148 }
149 break;
150 }
151 default:
152 pa_assert_not_reached();
153 }
154 }
155
156 #define SAMPLES 1019
157 #define TIMES 10000
158
159 static void run_test_float(void) {
160 float stereo[2*SAMPLES];
161 float stereo_ref[2*SAMPLES];
162 float mono[SAMPLES];
163 int i;
164 pa_usec_t start, stop;
165 pa_remap_func_t func;
166 pa_sample_format_t sf;
167 pa_remap_t remap;
168
169 pa_log_debug("checking NEON remap_mono_to_stereo(float, %d)", SAMPLES);
170
171 memset(stereo_ref, 0, sizeof(stereo_ref));
172 memset(stereo, 0, sizeof(stereo));
173
174 for (i = 0; i < SAMPLES; i++) {
175 mono[i] = rand()/(float) RAND_MAX - 0.5f;
176 }
177
178 sf = PA_SAMPLE_FLOAT32NE;
179 remap.format = &sf;
180 func = (pa_remap_func_t) remap_mono_to_stereo_c;
181 func(&remap, stereo_ref, mono, SAMPLES);
182 remap_mono_to_stereo_neon(&remap, stereo, mono, SAMPLES);
183
184 for (i = 0; i < 2*SAMPLES; i++) {
185 if (fabsf(stereo[i] - stereo_ref[i]) > 0.00001) {
186 pa_log_debug("%d: %.3f != %.3f (%.3f)", i, stereo[i], stereo_ref[i],
187 mono[i/2]);
188 }
189 }
190
191 start = pa_rtclock_now();
192 for (i = 0; i < TIMES; i++) {
193 remap_mono_to_stereo_neon(&remap, stereo, mono, SAMPLES);
194 }
195 stop = pa_rtclock_now();
196 pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start));
197
198 start = pa_rtclock_now();
199 for (i = 0; i < TIMES; i++) {
200 func(&remap, stereo_ref, mono, SAMPLES);
201 }
202 stop = pa_rtclock_now();
203 pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start));
204 }
205
206 static void run_test_s16(void) {
207 int16_t stereo[2*SAMPLES];
208 int16_t stereo_ref[2*SAMPLES];
209 int16_t mono[SAMPLES];
210 int i;
211 pa_usec_t start, stop;
212 pa_remap_func_t func;
213 pa_sample_format_t sf;
214 pa_remap_t remap;
215
216 pa_log_debug("checking NEON remap_mono_to_stereo(s16, %d)", SAMPLES);
217
218 memset(stereo_ref, 0, sizeof(stereo_ref));
219 memset(stereo, 0, sizeof(stereo));
220
221 for (i = 0; i < SAMPLES; i++) {
222 mono[i] = rand() - RAND_MAX/2;
223 }
224
225 sf = PA_SAMPLE_S16NE;
226 remap.format = &sf;
227 func = (pa_remap_func_t) remap_mono_to_stereo_c;
228 func(&remap, stereo_ref, mono, SAMPLES);
229 remap_mono_to_stereo_neon(&remap, stereo, mono, SAMPLES);
230
231 for (i = 0; i < 2*SAMPLES; i++) {
232 if (abs(stereo[i] - stereo_ref[i]) > 0) {
233 pa_log_debug("%d: %d != %d (%d)", i, stereo[i], stereo_ref[i],
234 mono[i/2]);
235 }
236 }
237
238 start = pa_rtclock_now();
239 for (i = 0; i < TIMES; i++) {
240 remap_mono_to_stereo_neon(&remap, stereo, mono, SAMPLES);
241 }
242 stop = pa_rtclock_now();
243 pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start));
244
245 start = pa_rtclock_now();
246 for (i = 0; i < TIMES; i++) {
247 func(&remap, stereo_ref, mono, SAMPLES);
248 }
249 stop = pa_rtclock_now();
250 pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start));
251 }
252
253 #endif /* defined(__arm__) */
254
255 int main() {
256
257 run_test_float();
258 run_test_s16();
259
260 return EXIT_SUCCESS;
261 }

Repositories maintained by Peter Meerwald, pmeerw@pmeerw.net.