Mercurial > hg > pa-neon
comparison remap_neon.c @ 2:09ee6a01a3d3
new
author | Peter Meerwald <p.meerwald@bct-electronic.com> |
---|---|
date | Wed, 04 Jul 2012 15:24:08 +0200 |
parents | b829afbea564 |
children | e889fd0e7769 |
comparison
equal
deleted
inserted
replaced
1:b829afbea564 | 2:09ee6a01a3d3 |
---|---|
186 default: | 186 default: |
187 pa_assert_not_reached(); | 187 pa_assert_not_reached(); |
188 } | 188 } |
189 } | 189 } |
190 | 190 |
191 | |
192 | |
193 static void remap_stereo_to_mono_c(pa_remap_t *m, void *dst, const void *src, unsigned n) { | 191 static void remap_stereo_to_mono_c(pa_remap_t *m, void *dst, const void *src, unsigned n) { |
194 unsigned i; | 192 unsigned i; |
195 | 193 |
196 switch (*m->format) { | 194 switch (*m->format) { |
197 case PA_SAMPLE_FLOAT32NE: | 195 case PA_SAMPLE_FLOAT32NE: |
233 default: | 231 default: |
234 pa_assert_not_reached(); | 232 pa_assert_not_reached(); |
235 } | 233 } |
236 } | 234 } |
237 | 235 |
238 | |
239 #if defined(__arm__) | 236 #if defined(__arm__) |
240 | 237 |
241 #include "arm_neon.h" | 238 #include "arm_neon.h" |
242 | 239 |
243 void remap_mono_to_stereo_neon(pa_remap_t *m, void *dst, const void *src, unsigned n) { | 240 static void mono_to_stereo_float_neon_a8(float *dst, const float *src, unsigned n) { |
244 unsigned i; | 241 int i = n & 3; |
242 | |
243 asm volatile ( | |
244 "mov %[n], %[n], lsr #2\n\t" | |
245 "1:\n\t" | |
246 "vld1.32 {q0}, [%[src]]!\n\t" | |
247 "vmov q1, q0\n\t" | |
248 "subs %[n], %[n], #1\n\t" | |
249 "vst2.32 {q0,q1}, [%[dst]]!\n\t" | |
250 "bgt 1b\n\t" | |
251 // output operands (or input operands that get modified) | |
252 : [dst] "+r" (dst), [src] "+r" (src), [n] "+r" (n) | |
253 : // input operands | |
254 : "memory", "cc" // clobber list | |
255 ); | |
256 | |
257 while (i--) { | |
258 dst[0] = dst[1] = src[0]; | |
259 src++; | |
260 dst += 2; | |
261 } | |
262 } | |
263 | |
264 static void mono_to_stereo_float_neon_a9(float *dst, const float *src, unsigned n) { | |
265 int i = n & 1; | |
266 | |
267 asm volatile ( | |
268 "mov %[n], %[n], lsr #1\n\t" | |
269 "1:\n\t" | |
270 "ldm %[src]!, {r4,r6}\n\t" | |
271 "mov r5, r4\n\t" | |
272 "mov r7, r6\n\t" | |
273 "subs %[n], %[n], #1\n\t" | |
274 "stm %[dst]!, {r4-r7}\n\t" | |
275 "bgt 1b\n\t" | |
276 // output operands (or input operands that get modified) | |
277 : [dst] "+r" (dst), [src] "+r" (src), [n] "+r" (n) | |
278 : // input operands | |
279 : "memory", "cc", "r4", "r5", "r6", "r7" // clobber list | |
280 ); | |
281 | |
282 while (i--) { | |
283 dst[0] = dst[1] = src[0]; | |
284 src++; | |
285 dst += 2; | |
286 } | |
287 } | |
288 | |
289 static void mono_to_stereo_int16_neon(int16_t *dst, const int16_t *src, unsigned n) { | |
290 int i = n & 7; | |
291 | |
292 asm volatile ( | |
293 "mov %[n], %[n], lsr #3\n\t" | |
294 "1:\n\t" | |
295 "vld1.16 {q0}, [%[src]]!\n\t" | |
296 "vmov q1, q0\n\t" | |
297 "subs %[n], %[n], #1\n\t" | |
298 "vst2.16 {q0,q1}, [%[dst]]!\n\t" | |
299 "bgt 1b\n\t" | |
300 // output operands (or input operands that get modified) | |
301 : [dst] "+r" (dst), [src] "+r" (src), [n] "+r" (n) | |
302 : // input operands | |
303 : "memory", "cc" // clobber list | |
304 ); | |
305 | |
306 while (i--) { | |
307 dst[0] = dst[1] = src[0]; | |
308 src++; | |
309 dst += 2; | |
310 } | |
311 } | |
312 | |
313 static void remap_mono_to_stereo_neon_a9(pa_remap_t *m, void *dst, const void *src, unsigned n) { | |
245 switch (*m->format) { | 314 switch (*m->format) { |
246 case PA_SAMPLE_FLOAT32NE: | 315 case PA_SAMPLE_FLOAT32NE: |
247 { | 316 mono_to_stereo_float_neon_a9(dst, src, n); |
248 float *d = (float *) dst, *s = (float *) src; | 317 break; |
249 | |
250 for (i = 0; i < n/4; i++) { | |
251 float32x4x2_t stereo; | |
252 stereo.val[0] = vld1q_f32(s); | |
253 stereo.val[1] = stereo.val[0]; | |
254 vst2q_f32(d, stereo); | |
255 s += 4; | |
256 d += 8; | |
257 } | |
258 | |
259 for (i = n & ~3; i < n; i++) { | |
260 d[0] = d[1] = s[0]; | |
261 s++; | |
262 d += 2; | |
263 } | |
264 break; | |
265 } | |
266 case PA_SAMPLE_S16NE: | 318 case PA_SAMPLE_S16NE: |
267 { | 319 mono_to_stereo_int16_neon(dst, src, n); |
268 int16_t *d = (int16_t *) dst, *s = (int16_t *) src; | 320 break; |
269 | |
270 for (i = 0; i < n/8; i++) { | |
271 int16x8x2_t stereo; | |
272 stereo.val[0] = vld1q_s16(s); | |
273 stereo.val[1] = stereo.val[0]; | |
274 vst2q_s16(d, stereo); | |
275 s += 8; | |
276 d += 16; | |
277 } | |
278 | |
279 for (i = n & ~7; i < n; i++) { | |
280 d[0] = d[1] = s[0]; | |
281 s++; | |
282 d += 2; | |
283 } | |
284 break; | |
285 } | |
286 default: | 321 default: |
287 pa_assert_not_reached(); | 322 pa_assert_not_reached(); |
288 } | 323 } |
289 } | 324 } |
290 | 325 |
291 /* don't inline, causes ICE, see https://bugs.launchpad.net/bugs/936863 */ | 326 static void remap_mono_to_stereo_neon_a8(pa_remap_t *m, void *dst, const void *src, unsigned n) { |
292 static __attribute__ ((noinline)) void stereo_to_mono_float(float *d, const float *s, unsigned n) { | 327 switch (*m->format) { |
293 unsigned i; | 328 case PA_SAMPLE_FLOAT32NE: |
294 | 329 mono_to_stereo_float_neon_a8(dst, src, n); |
295 for (i = 0; i < n/4; i++) { | 330 break; |
296 float32x4x2_t stereo = vld2q_f32(s); | 331 case PA_SAMPLE_S16NE: |
297 float32x4_t mono = vaddq_f32(stereo.val[0], stereo.val[1]); | 332 mono_to_stereo_int16_neon(dst, src, n); |
298 vst1q_f32(d, mono); | 333 break; |
299 s += 8; | 334 default: |
300 d += 4; | 335 pa_assert_not_reached(); |
301 } | 336 } |
302 for (i = n & ~3; i < n; i++) { | 337 } |
303 d[0] = s[0] + s[1]; | 338 |
304 s += 2; | 339 static void stereo_to_mono_float_neon(float *dst, const float *src, unsigned n) { |
305 d++; | 340 int i = n & 3; |
306 } | 341 |
307 } | 342 asm volatile ( |
308 | 343 "mov %[n], %[n], lsr #2\n\t" |
309 /* don't inline, causes ICE, see https://bugs.launchpad.net/bugs/936863 */ | 344 "1:\n\t" |
310 static __attribute__ ((noinline)) void stereo_to_mono_int16(int16_t *d, const int16_t *s, unsigned n) { | 345 "vld2.32 {q0,q1}, [%[src]]!\n\t" |
311 unsigned int i; | 346 "vadd.f32 q0, q0, q1\n\t" |
312 | 347 "subs %[n], %[n], #1\n\t" |
313 for (i = 0; i < n/8; i++) { | 348 "vst1.32 {q0}, [%[dst]]!\n\t" |
314 int16x8x2_t stereo = vld2q_s16(s); | 349 "bgt 1b\n\t" |
315 int16x8_t mono = vaddq_s16(stereo.val[0], stereo.val[1]); | 350 // output operands (or input operands that get modified) |
316 vst1q_s16(d, mono); | 351 : [dst] "+r" (dst), [src] "+r" (src), [n] "+r" (n) |
317 s += 16; | 352 : // input operands |
318 d += 8; | 353 : "memory", "cc" // clobber list |
319 } | 354 ); |
320 for (i = n & ~7; i < n; i++) { | 355 |
321 d[0] = s[0] + s[1]; | 356 while (i--) { |
322 s += 2; | 357 dst[0] = src[0] + src[1]; |
323 d++; | 358 src += 2; |
359 dst++; | |
360 } | |
361 } | |
362 | |
363 static void stereo_to_mono_int16_neon(int16_t *dst, const int16_t *src, unsigned n) { | |
364 int i = n & 7; | |
365 | |
366 asm volatile ( | |
367 "mov %[n], %[n], lsr #3\n\t" | |
368 "1:\n\t" | |
369 "vld2.16 {q0,q1}, [%[src]]!\n\t" | |
370 "vadd.s16 q0, q0, q1\n\t" | |
371 "subs %[n], %[n], #1\n\t" | |
372 "vst1.16 {q0}, [%[dst]]!\n\t" | |
373 "bgt 1b\n\t" | |
374 // output operands (or input operands that get modified) | |
375 : [dst] "+r" (dst), [src] "+r" (src), [n] "+r" (n) | |
376 : // input operands | |
377 : "memory", "cc" // clobber list | |
378 ); | |
379 | |
380 while (i--) { | |
381 dst[0] = src[0] + src[1]; | |
382 src += 2; | |
383 dst++; | |
324 } | 384 } |
325 } | 385 } |
326 | 386 |
327 static void remap_stereo_to_mono_neon(pa_remap_t *m, void *dst, const void *src, unsigned n) { | 387 static void remap_stereo_to_mono_neon(pa_remap_t *m, void *dst, const void *src, unsigned n) { |
328 switch (*m->format) { | 388 switch (*m->format) { |
329 case PA_SAMPLE_FLOAT32NE: | 389 case PA_SAMPLE_FLOAT32NE: |
330 stereo_to_mono_float(dst, src, n); | 390 stereo_to_mono_float_neon(dst, src, n); |
331 break; | 391 break; |
332 case PA_SAMPLE_S16NE: | 392 case PA_SAMPLE_S16NE: |
333 stereo_to_mono_int16(dst, src, n); | 393 stereo_to_mono_int16_neon(dst, src, n); |
334 break; | 394 break; |
335 default: | 395 default: |
336 pa_assert_not_reached(); | 396 pa_assert_not_reached(); |
337 } | 397 } |
338 } | 398 } |
399 | |
339 #define SAMPLES 1019 | 400 #define SAMPLES 1019 |
340 #define TIMES 10000 | 401 #define TIMES 500000 |
341 | 402 |
342 static void run_test_mono_to_stereo_float(void) { | 403 static void run_test_mono_to_stereo_float(void) { |
343 float stereo[2*SAMPLES]; | 404 float stereo_a9[2*SAMPLES]; |
405 float stereo_a8[2*SAMPLES]; | |
344 float stereo_ref[2*SAMPLES]; | 406 float stereo_ref[2*SAMPLES]; |
345 float stereo_gen[2*SAMPLES]; | 407 float stereo_gen[2*SAMPLES]; |
346 float mono[SAMPLES]; | 408 float mono[SAMPLES]; |
347 int i; | 409 int i; |
348 pa_usec_t start, stop; | 410 pa_usec_t start, stop; |
349 pa_sample_format_t sf; | 411 pa_sample_format_t sf; |
350 pa_sample_spec iss, oss; | 412 pa_sample_spec iss, oss; |
351 pa_remap_t remap; | 413 pa_remap_t remap; |
352 | 414 |
353 pa_log_debug("checking NEON remap_mono_to_stereo(float, %d)", SAMPLES); | 415 pa_log_debug("checking NEON remap_mono_to_stereo(float, %d)", SAMPLES); |
354 | 416 |
355 memset(stereo_ref, 0, sizeof(stereo_ref)); | 417 memset(stereo_ref, 0, sizeof(stereo_ref)); |
356 memset(stereo, 0, sizeof(stereo)); | 418 memset(stereo_gen, 0, sizeof(stereo_gen)); |
419 memset(stereo_a9, 0, sizeof(stereo_a9)); | |
420 memset(stereo_a8, 0, sizeof(stereo_a8)); | |
357 | 421 |
358 for (i = 0; i < SAMPLES; i++) { | 422 for (i = 0; i < SAMPLES; i++) { |
359 mono[i] = rand()/(float) RAND_MAX - 0.5f; | 423 mono[i] = rand()/(float) RAND_MAX - 0.5f; |
360 } | 424 } |
361 | 425 |
368 remap.i_ss = &iss; | 432 remap.i_ss = &iss; |
369 remap.o_ss = &oss; | 433 remap.o_ss = &oss; |
370 remap.map_table_f[0][0] = 1.0; | 434 remap.map_table_f[0][0] = 1.0; |
371 remap.map_table_f[1][0] = 1.0; | 435 remap.map_table_f[1][0] = 1.0; |
372 | 436 |
437 remap_mono_to_stereo_neon_a9(&remap, stereo_a9, mono, SAMPLES); | |
438 remap_mono_to_stereo_neon_a8(&remap, stereo_a8, mono, SAMPLES); | |
373 remap_mono_to_stereo_c(&remap, stereo_ref, mono, SAMPLES); | 439 remap_mono_to_stereo_c(&remap, stereo_ref, mono, SAMPLES); |
374 remap_channels_matrix_c(&remap, stereo_gen, mono, SAMPLES); | 440 remap_channels_matrix_c(&remap, stereo_gen, mono, SAMPLES); |
375 remap_mono_to_stereo_neon(&remap, stereo, mono, SAMPLES); | |
376 | 441 |
377 for (i = 0; i < 2*SAMPLES; i++) { | 442 for (i = 0; i < 2*SAMPLES; i++) { |
378 if (fabsf(stereo[i] - stereo_ref[i]) > 0.00001) { | 443 if (fabsf(stereo_a9[i] - stereo_ref[i]) > 0.00001) { |
379 pa_log_debug("%d: %.3f != %.3f (%.3f)", i, stereo[i], stereo_ref[i], | 444 pa_log_debug("NEON/A9 %d: %.3f != %.3f (%.3f)", i, stereo_a9[i], stereo_ref[i], |
380 mono[i/2]); | 445 mono[i/2]); |
381 } | 446 } |
382 } | 447 } |
383 for (i = 0; i < 2*SAMPLES; i++) { | 448 for (i = 0; i < 2*SAMPLES; i++) { |
384 if (fabsf(stereo[i] - stereo_gen[i]) > 0.00001) { | 449 if (fabsf(stereo_a8[i] - stereo_ref[i]) > 0.00001) { |
385 pa_log_debug("%d: %.3f != %.3f (%.3f)", i, stereo[i], stereo_gen[i], | 450 pa_log_debug("NEON/A8 %d: %.3f != %.3f (%.3f)", i, stereo_a8[i], stereo_ref[i], |
386 mono[i/2]); | 451 mono[i/2]); |
387 } | 452 } |
388 } | 453 } |
389 | 454 for (i = 0; i < 2*SAMPLES; i++) { |
390 start = pa_rtclock_now(); | 455 if (fabsf(stereo_gen[i] - stereo_ref[i]) > 0.00001) { |
391 for (i = 0; i < TIMES; i++) { | 456 pa_log_debug("generic %d: %.3f != %.3f (%.3f)", i, stereo_gen[i], stereo_ref[i], |
392 remap_mono_to_stereo_neon(&remap, stereo, mono, SAMPLES); | 457 mono[i/2]); |
393 } | 458 } |
394 stop = pa_rtclock_now(); | 459 } |
395 pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); | |
396 | 460 |
397 start = pa_rtclock_now(); | 461 start = pa_rtclock_now(); |
398 for (i = 0; i < TIMES; i++) { | 462 for (i = 0; i < TIMES; i++) { |
399 remap_mono_to_stereo_c(&remap, stereo_ref, mono, SAMPLES); | 463 remap_mono_to_stereo_c(&remap, stereo_ref, mono, SAMPLES); |
400 } | 464 } |
401 stop = pa_rtclock_now(); | 465 stop = pa_rtclock_now(); |
402 pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); | 466 pa_log_info("ref:\t\t%llu usec.", (long long unsigned int)(stop - start)); |
467 | |
468 start = pa_rtclock_now(); | |
469 for (i = 0; i < TIMES; i++) { | |
470 remap_mono_to_stereo_neon_a9(&remap, stereo_a9, mono, SAMPLES); | |
471 } | |
472 stop = pa_rtclock_now(); | |
473 pa_log_info("NEON/A9:\t%llu usec.", (long long unsigned int)(stop - start)); | |
474 | |
475 start = pa_rtclock_now(); | |
476 for (i = 0; i < TIMES; i++) { | |
477 remap_mono_to_stereo_neon_a8(&remap, stereo_a8, mono, SAMPLES); | |
478 } | |
479 stop = pa_rtclock_now(); | |
480 pa_log_info("NEON/A8:\t%llu usec.", (long long unsigned int)(stop - start)); | |
403 | 481 |
404 start = pa_rtclock_now(); | 482 start = pa_rtclock_now(); |
405 for (i = 0; i < TIMES; i++) { | 483 for (i = 0; i < TIMES; i++) { |
406 remap_channels_matrix_c(&remap, stereo_gen, mono, SAMPLES); | 484 remap_channels_matrix_c(&remap, stereo_gen, mono, SAMPLES); |
407 } | 485 } |
408 stop = pa_rtclock_now(); | 486 stop = pa_rtclock_now(); |
409 pa_log_info("generic: %llu usec.", (long long unsigned int)(stop - start)); | 487 pa_log_info("generic:\t%llu usec.", (long long unsigned int)(stop - start)); |
410 } | 488 } |
411 | 489 |
412 static void run_test_stereo_to_mono_float(void) { | 490 static void run_test_stereo_to_mono_float(void) { |
413 float stereo[2*SAMPLES]; | 491 float stereo[2*SAMPLES]; |
414 float mono_ref[SAMPLES]; | 492 float mono_ref[SAMPLES]; |
454 start = pa_rtclock_now(); | 532 start = pa_rtclock_now(); |
455 for (i = 0; i < TIMES; i++) { | 533 for (i = 0; i < TIMES; i++) { |
456 remap_stereo_to_mono_neon(&remap, mono, stereo, SAMPLES); | 534 remap_stereo_to_mono_neon(&remap, mono, stereo, SAMPLES); |
457 } | 535 } |
458 stop = pa_rtclock_now(); | 536 stop = pa_rtclock_now(); |
459 pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); | 537 pa_log_info("NEON:\t\t%llu usec.", (long long unsigned int)(stop - start)); |
460 | 538 |
461 start = pa_rtclock_now(); | 539 start = pa_rtclock_now(); |
462 for (i = 0; i < TIMES; i++) { | 540 for (i = 0; i < TIMES; i++) { |
463 remap_stereo_to_mono_c(&remap, mono_ref, stereo, SAMPLES); | 541 remap_stereo_to_mono_c(&remap, mono_ref, stereo, SAMPLES); |
464 } | 542 } |
465 stop = pa_rtclock_now(); | 543 stop = pa_rtclock_now(); |
466 pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); | 544 pa_log_info("ref:\t\t%llu usec.", (long long unsigned int)(stop - start)); |
467 | 545 |
468 start = pa_rtclock_now(); | 546 start = pa_rtclock_now(); |
469 for (i = 0; i < TIMES; i++) { | 547 for (i = 0; i < TIMES; i++) { |
470 remap_channels_matrix_c(&remap, mono_gen, stereo, SAMPLES); | 548 remap_channels_matrix_c(&remap, mono_gen, stereo, SAMPLES); |
471 } | 549 } |
472 stop = pa_rtclock_now(); | 550 stop = pa_rtclock_now(); |
473 pa_log_info("generic: %llu usec.", (long long unsigned int)(stop - start)); | 551 pa_log_info("generic:\t%llu usec.", (long long unsigned int)(stop - start)); |
474 } | 552 } |
475 | 553 |
476 static void run_test_mono_to_stereo_s16(void) { | 554 static void run_test_mono_to_stereo_s16(void) { |
477 int16_t stereo[2*SAMPLES]; | 555 int16_t stereo_a9[2*SAMPLES]; |
556 int16_t stereo_a8[2*SAMPLES]; | |
478 int16_t stereo_ref[2*SAMPLES]; | 557 int16_t stereo_ref[2*SAMPLES]; |
479 int16_t stereo_gen[2*SAMPLES]; | 558 int16_t stereo_gen[2*SAMPLES]; |
480 int16_t mono[SAMPLES]; | 559 int16_t mono[SAMPLES]; |
481 int i; | 560 int i; |
482 pa_usec_t start, stop; | 561 pa_usec_t start, stop; |
485 pa_remap_t remap; | 564 pa_remap_t remap; |
486 | 565 |
487 pa_log_debug("checking NEON remap_mono_to_stereo(s16, %d)", SAMPLES); | 566 pa_log_debug("checking NEON remap_mono_to_stereo(s16, %d)", SAMPLES); |
488 | 567 |
489 memset(stereo_ref, 0, sizeof(stereo_ref)); | 568 memset(stereo_ref, 0, sizeof(stereo_ref)); |
490 memset(stereo, 0, sizeof(stereo)); | 569 memset(stereo_a9, 0, sizeof(stereo_a9)); |
570 memset(stereo_a8, 0, sizeof(stereo_a8)); | |
571 memset(stereo_gen, 0, sizeof(stereo_gen)); | |
491 | 572 |
492 for (i = 0; i < SAMPLES; i++) { | 573 for (i = 0; i < SAMPLES; i++) { |
493 mono[i] = rand() - RAND_MAX/2; | 574 mono[i] = rand() - RAND_MAX/2; |
494 } | 575 } |
495 | 576 |
499 iss.channels = 1; | 580 iss.channels = 1; |
500 oss.format = PA_SAMPLE_S16NE; | 581 oss.format = PA_SAMPLE_S16NE; |
501 oss.channels = 2; | 582 oss.channels = 2; |
502 remap.i_ss = &iss; | 583 remap.i_ss = &iss; |
503 remap.o_ss = &oss; | 584 remap.o_ss = &oss; |
504 remap.map_table_f[0][0] = 1.0; | 585 remap.map_table_i[0][0] = 0x10000; |
505 remap.map_table_f[1][0] = 1.0; | 586 remap.map_table_i[1][0] = 0x10000; |
506 | 587 |
507 remap_mono_to_stereo_c(&remap, stereo_ref, mono, SAMPLES); | 588 remap_mono_to_stereo_c(&remap, stereo_ref, mono, SAMPLES); |
508 remap_channels_matrix_c(&remap, stereo_gen, mono, SAMPLES); | 589 remap_channels_matrix_c(&remap, stereo_gen, mono, SAMPLES); |
509 remap_mono_to_stereo_neon(&remap, stereo, mono, SAMPLES); | 590 remap_mono_to_stereo_neon_a9(&remap, stereo_a9, mono, SAMPLES); |
591 remap_mono_to_stereo_neon_a8(&remap, stereo_a8, mono, SAMPLES); | |
510 | 592 |
511 for (i = 0; i < 2*SAMPLES; i++) { | 593 for (i = 0; i < 2*SAMPLES; i++) { |
512 if (abs(stereo[i] - stereo_ref[i]) > 0) { | 594 if (abs(stereo_a9[i] - stereo_ref[i]) > 0) { |
513 pa_log_debug("%d: %d != %d (%d)", i, stereo[i], stereo_ref[i], | 595 pa_log_debug("NEON/A9 %d: %d != %d (%d)", i, stereo_a9[i], stereo_ref[i], |
514 mono[i/2]); | 596 mono[i/2]); |
515 } | 597 } |
516 } | 598 } |
517 | |
518 for (i = 0; i < 2*SAMPLES; i++) { | 599 for (i = 0; i < 2*SAMPLES; i++) { |
519 if (abs(stereo[i] - stereo_gen[i]) > 0) { | 600 if (abs(stereo_a8[i] - stereo_ref[i]) > 0) { |
520 pa_log_debug("%d: %d != %d (%d)", i, stereo[i], stereo_gen[i], | 601 pa_log_debug("NEON/A8 %d: %d != %d (%d)", i, stereo_a8[i], stereo_ref[i], |
521 mono[i/2]); | 602 mono[i/2]); |
522 } | 603 } |
523 } | 604 } |
524 | 605 |
525 start = pa_rtclock_now(); | 606 for (i = 0; i < 2*SAMPLES; i++) { |
526 for (i = 0; i < TIMES; i++) { | 607 if (abs(stereo_gen[i] - stereo_ref[i]) > 0) { |
527 remap_mono_to_stereo_neon(&remap, stereo, mono, SAMPLES); | 608 pa_log_debug("generic %d: %d != %d (%d)", i, stereo_gen[i], stereo_ref[i], |
528 } | 609 mono[i/2]); |
529 stop = pa_rtclock_now(); | 610 } |
530 pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); | 611 } |
612 | |
613 start = pa_rtclock_now(); | |
614 for (i = 0; i < TIMES; i++) { | |
615 remap_mono_to_stereo_neon_a9(&remap, stereo_a9, mono, SAMPLES); | |
616 } | |
617 stop = pa_rtclock_now(); | |
618 pa_log_info("NEON/A9:\t%llu usec.", (long long unsigned int)(stop - start)); | |
619 | |
620 start = pa_rtclock_now(); | |
621 for (i = 0; i < TIMES; i++) { | |
622 remap_mono_to_stereo_neon_a8(&remap, stereo_a8, mono, SAMPLES); | |
623 } | |
624 stop = pa_rtclock_now(); | |
625 pa_log_info("NEON/A8:\t%llu usec.", (long long unsigned int)(stop - start)); | |
531 | 626 |
532 start = pa_rtclock_now(); | 627 start = pa_rtclock_now(); |
533 for (i = 0; i < TIMES; i++) { | 628 for (i = 0; i < TIMES; i++) { |
534 remap_mono_to_stereo_c(&remap, stereo_ref, mono, SAMPLES); | 629 remap_mono_to_stereo_c(&remap, stereo_ref, mono, SAMPLES); |
535 } | 630 } |
536 stop = pa_rtclock_now(); | 631 stop = pa_rtclock_now(); |
537 pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); | 632 pa_log_info("ref:\t\t%llu usec.", (long long unsigned int)(stop - start)); |
538 | 633 |
539 start = pa_rtclock_now(); | 634 start = pa_rtclock_now(); |
540 for (i = 0; i < TIMES; i++) { | 635 for (i = 0; i < TIMES; i++) { |
541 remap_channels_matrix_c(&remap, stereo_gen, mono, SAMPLES); | 636 remap_channels_matrix_c(&remap, stereo_gen, mono, SAMPLES); |
542 } | 637 } |
543 stop = pa_rtclock_now(); | 638 stop = pa_rtclock_now(); |
544 pa_log_info("generic: %llu usec.", (long long unsigned int)(stop - start)); | 639 pa_log_info("generic:\t%llu usec.", (long long unsigned int)(stop - start)); |
545 } | 640 } |
546 | 641 |
547 static void run_test_stereo_to_mono_s16(void) { | 642 static void run_test_stereo_to_mono_s16(void) { |
548 int16_t stereo[2*SAMPLES]; | 643 int16_t stereo[2*SAMPLES]; |
549 int16_t mono_ref[SAMPLES]; | 644 int16_t mono_ref[SAMPLES]; |
556 pa_remap_t remap; | 651 pa_remap_t remap; |
557 | 652 |
558 pa_log_debug("checking NEON remap_stereo_to_mono(s16, %d)", SAMPLES); | 653 pa_log_debug("checking NEON remap_stereo_to_mono(s16, %d)", SAMPLES); |
559 | 654 |
560 memset(mono_ref, 0, sizeof(mono_ref)); | 655 memset(mono_ref, 0, sizeof(mono_ref)); |
656 memset(mono_gen, 0, sizeof(mono_gen)); | |
561 memset(mono, 0, sizeof(mono)); | 657 memset(mono, 0, sizeof(mono)); |
562 | 658 |
563 for (i = 0; i < 2*SAMPLES; i++) { | 659 for (i = 0; i < 2*SAMPLES; i++) { |
564 stereo[i] = rand() - RAND_MAX/2; | 660 stereo[i] = rand() - RAND_MAX/2; |
565 } | 661 } |
570 iss.channels = 2; | 666 iss.channels = 2; |
571 oss.format = PA_SAMPLE_S16NE; | 667 oss.format = PA_SAMPLE_S16NE; |
572 oss.channels = 1; | 668 oss.channels = 1; |
573 remap.i_ss = &iss; | 669 remap.i_ss = &iss; |
574 remap.o_ss = &oss; | 670 remap.o_ss = &oss; |
575 remap.map_table_f[0][0] = 1.0; | 671 remap.map_table_i[0][0] = 0x10000; |
576 remap.map_table_f[0][1] = 1.0; | 672 remap.map_table_i[0][1] = 0x10000; |
577 | 673 |
578 remap_stereo_to_mono_c(&remap, mono_ref, stereo, SAMPLES); | 674 remap_stereo_to_mono_c(&remap, mono_ref, stereo, SAMPLES); |
579 remap_channels_matrix_c(&remap, mono_gen, stereo, SAMPLES); | 675 remap_channels_matrix_c(&remap, mono_gen, stereo, SAMPLES); |
580 remap_stereo_to_mono_neon(&remap, mono, stereo, SAMPLES); | 676 remap_stereo_to_mono_neon(&remap, mono, stereo, SAMPLES); |
581 | 677 |
595 start = pa_rtclock_now(); | 691 start = pa_rtclock_now(); |
596 for (i = 0; i < TIMES; i++) { | 692 for (i = 0; i < TIMES; i++) { |
597 remap_stereo_to_mono_neon(&remap, mono, stereo, SAMPLES); | 693 remap_stereo_to_mono_neon(&remap, mono, stereo, SAMPLES); |
598 } | 694 } |
599 stop = pa_rtclock_now(); | 695 stop = pa_rtclock_now(); |
600 pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); | 696 pa_log_info("NEON:\t\t%llu usec.", (long long unsigned int)(stop - start)); |
601 | 697 |
602 start = pa_rtclock_now(); | 698 start = pa_rtclock_now(); |
603 for (i = 0; i < TIMES; i++) { | 699 for (i = 0; i < TIMES; i++) { |
604 remap_stereo_to_mono_c(&remap, mono_ref, stereo, SAMPLES); | 700 remap_stereo_to_mono_c(&remap, mono_ref, stereo, SAMPLES); |
605 } | 701 } |
606 stop = pa_rtclock_now(); | 702 stop = pa_rtclock_now(); |
607 pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); | 703 pa_log_info("ref:\t\t%llu usec.", (long long unsigned int)(stop - start)); |
608 | 704 |
609 start = pa_rtclock_now(); | 705 start = pa_rtclock_now(); |
610 for (i = 0; i < TIMES; i++) { | 706 for (i = 0; i < TIMES; i++) { |
611 remap_channels_matrix_c(&remap, mono_gen, stereo, SAMPLES); | 707 remap_channels_matrix_c(&remap, mono_gen, stereo, SAMPLES); |
612 } | 708 } |
613 stop = pa_rtclock_now(); | 709 stop = pa_rtclock_now(); |
614 pa_log_info("generic: %llu usec.", (long long unsigned int)(stop - start)); | 710 pa_log_info("generic:\t%llu usec.", (long long unsigned int)(stop - start)); |
615 } | 711 } |
616 | |
617 | 712 |
618 #endif /* defined(__arm__) */ | 713 #endif /* defined(__arm__) */ |
619 | 714 |
620 int main() { | 715 int main() { |
621 | |
622 run_test_stereo_to_mono_float(); | 716 run_test_stereo_to_mono_float(); |
623 run_test_stereo_to_mono_s16(); | 717 run_test_stereo_to_mono_s16(); |
624 | 718 |
625 run_test_mono_to_stereo_float(); | 719 run_test_mono_to_stereo_float(); |
626 run_test_mono_to_stereo_s16(); | 720 run_test_mono_to_stereo_s16(); |
627 | 721 |
628 | |
629 return EXIT_SUCCESS; | 722 return EXIT_SUCCESS; |
630 } | 723 } |