comparison remap_neon.c @ 2:09ee6a01a3d3

new
author Peter Meerwald <p.meerwald@bct-electronic.com>
date Wed, 04 Jul 2012 15:24:08 +0200
parents b829afbea564
children e889fd0e7769
comparison
equal deleted inserted replaced
1:b829afbea564 2:09ee6a01a3d3
186 default: 186 default:
187 pa_assert_not_reached(); 187 pa_assert_not_reached();
188 } 188 }
189 } 189 }
190 190
191
192
193 static void remap_stereo_to_mono_c(pa_remap_t *m, void *dst, const void *src, unsigned n) { 191 static void remap_stereo_to_mono_c(pa_remap_t *m, void *dst, const void *src, unsigned n) {
194 unsigned i; 192 unsigned i;
195 193
196 switch (*m->format) { 194 switch (*m->format) {
197 case PA_SAMPLE_FLOAT32NE: 195 case PA_SAMPLE_FLOAT32NE:
233 default: 231 default:
234 pa_assert_not_reached(); 232 pa_assert_not_reached();
235 } 233 }
236 } 234 }
237 235
238
239 #if defined(__arm__) 236 #if defined(__arm__)
240 237
241 #include "arm_neon.h" 238 #include "arm_neon.h"
242 239
243 void remap_mono_to_stereo_neon(pa_remap_t *m, void *dst, const void *src, unsigned n) { 240 static void mono_to_stereo_float_neon_a8(float *dst, const float *src, unsigned n) {
244 unsigned i; 241 int i = n & 3;
242
243 asm volatile (
244 "mov %[n], %[n], lsr #2\n\t"
245 "1:\n\t"
246 "vld1.32 {q0}, [%[src]]!\n\t"
247 "vmov q1, q0\n\t"
248 "subs %[n], %[n], #1\n\t"
249 "vst2.32 {q0,q1}, [%[dst]]!\n\t"
250 "bgt 1b\n\t"
251 // output operands (or input operands that get modified)
252 : [dst] "+r" (dst), [src] "+r" (src), [n] "+r" (n)
253 : // input operands
254 : "memory", "cc" // clobber list
255 );
256
257 while (i--) {
258 dst[0] = dst[1] = src[0];
259 src++;
260 dst += 2;
261 }
262 }
263
264 static void mono_to_stereo_float_neon_a9(float *dst, const float *src, unsigned n) {
265 int i = n & 1;
266
267 asm volatile (
268 "mov %[n], %[n], lsr #1\n\t"
269 "1:\n\t"
270 "ldm %[src]!, {r4,r6}\n\t"
271 "mov r5, r4\n\t"
272 "mov r7, r6\n\t"
273 "subs %[n], %[n], #1\n\t"
274 "stm %[dst]!, {r4-r7}\n\t"
275 "bgt 1b\n\t"
276 // output operands (or input operands that get modified)
277 : [dst] "+r" (dst), [src] "+r" (src), [n] "+r" (n)
278 : // input operands
279 : "memory", "cc", "r4", "r5", "r6", "r7" // clobber list
280 );
281
282 while (i--) {
283 dst[0] = dst[1] = src[0];
284 src++;
285 dst += 2;
286 }
287 }
288
289 static void mono_to_stereo_int16_neon(int16_t *dst, const int16_t *src, unsigned n) {
290 int i = n & 7;
291
292 asm volatile (
293 "mov %[n], %[n], lsr #3\n\t"
294 "1:\n\t"
295 "vld1.16 {q0}, [%[src]]!\n\t"
296 "vmov q1, q0\n\t"
297 "subs %[n], %[n], #1\n\t"
298 "vst2.16 {q0,q1}, [%[dst]]!\n\t"
299 "bgt 1b\n\t"
300 // output operands (or input operands that get modified)
301 : [dst] "+r" (dst), [src] "+r" (src), [n] "+r" (n)
302 : // input operands
303 : "memory", "cc" // clobber list
304 );
305
306 while (i--) {
307 dst[0] = dst[1] = src[0];
308 src++;
309 dst += 2;
310 }
311 }
312
313 static void remap_mono_to_stereo_neon_a9(pa_remap_t *m, void *dst, const void *src, unsigned n) {
245 switch (*m->format) { 314 switch (*m->format) {
246 case PA_SAMPLE_FLOAT32NE: 315 case PA_SAMPLE_FLOAT32NE:
247 { 316 mono_to_stereo_float_neon_a9(dst, src, n);
248 float *d = (float *) dst, *s = (float *) src; 317 break;
249
250 for (i = 0; i < n/4; i++) {
251 float32x4x2_t stereo;
252 stereo.val[0] = vld1q_f32(s);
253 stereo.val[1] = stereo.val[0];
254 vst2q_f32(d, stereo);
255 s += 4;
256 d += 8;
257 }
258
259 for (i = n & ~3; i < n; i++) {
260 d[0] = d[1] = s[0];
261 s++;
262 d += 2;
263 }
264 break;
265 }
266 case PA_SAMPLE_S16NE: 318 case PA_SAMPLE_S16NE:
267 { 319 mono_to_stereo_int16_neon(dst, src, n);
268 int16_t *d = (int16_t *) dst, *s = (int16_t *) src; 320 break;
269
270 for (i = 0; i < n/8; i++) {
271 int16x8x2_t stereo;
272 stereo.val[0] = vld1q_s16(s);
273 stereo.val[1] = stereo.val[0];
274 vst2q_s16(d, stereo);
275 s += 8;
276 d += 16;
277 }
278
279 for (i = n & ~7; i < n; i++) {
280 d[0] = d[1] = s[0];
281 s++;
282 d += 2;
283 }
284 break;
285 }
286 default: 321 default:
287 pa_assert_not_reached(); 322 pa_assert_not_reached();
288 } 323 }
289 } 324 }
290 325
291 /* don't inline, causes ICE, see https://bugs.launchpad.net/bugs/936863 */ 326 static void remap_mono_to_stereo_neon_a8(pa_remap_t *m, void *dst, const void *src, unsigned n) {
292 static __attribute__ ((noinline)) void stereo_to_mono_float(float *d, const float *s, unsigned n) { 327 switch (*m->format) {
293 unsigned i; 328 case PA_SAMPLE_FLOAT32NE:
294 329 mono_to_stereo_float_neon_a8(dst, src, n);
295 for (i = 0; i < n/4; i++) { 330 break;
296 float32x4x2_t stereo = vld2q_f32(s); 331 case PA_SAMPLE_S16NE:
297 float32x4_t mono = vaddq_f32(stereo.val[0], stereo.val[1]); 332 mono_to_stereo_int16_neon(dst, src, n);
298 vst1q_f32(d, mono); 333 break;
299 s += 8; 334 default:
300 d += 4; 335 pa_assert_not_reached();
301 } 336 }
302 for (i = n & ~3; i < n; i++) { 337 }
303 d[0] = s[0] + s[1]; 338
304 s += 2; 339 static void stereo_to_mono_float_neon(float *dst, const float *src, unsigned n) {
305 d++; 340 int i = n & 3;
306 } 341
307 } 342 asm volatile (
308 343 "mov %[n], %[n], lsr #2\n\t"
309 /* don't inline, causes ICE, see https://bugs.launchpad.net/bugs/936863 */ 344 "1:\n\t"
310 static __attribute__ ((noinline)) void stereo_to_mono_int16(int16_t *d, const int16_t *s, unsigned n) { 345 "vld2.32 {q0,q1}, [%[src]]!\n\t"
311 unsigned int i; 346 "vadd.f32 q0, q0, q1\n\t"
312 347 "subs %[n], %[n], #1\n\t"
313 for (i = 0; i < n/8; i++) { 348 "vst1.32 {q0}, [%[dst]]!\n\t"
314 int16x8x2_t stereo = vld2q_s16(s); 349 "bgt 1b\n\t"
315 int16x8_t mono = vaddq_s16(stereo.val[0], stereo.val[1]); 350 // output operands (or input operands that get modified)
316 vst1q_s16(d, mono); 351 : [dst] "+r" (dst), [src] "+r" (src), [n] "+r" (n)
317 s += 16; 352 : // input operands
318 d += 8; 353 : "memory", "cc" // clobber list
319 } 354 );
320 for (i = n & ~7; i < n; i++) { 355
321 d[0] = s[0] + s[1]; 356 while (i--) {
322 s += 2; 357 dst[0] = src[0] + src[1];
323 d++; 358 src += 2;
359 dst++;
360 }
361 }
362
363 static void stereo_to_mono_int16_neon(int16_t *dst, const int16_t *src, unsigned n) {
364 int i = n & 7;
365
366 asm volatile (
367 "mov %[n], %[n], lsr #3\n\t"
368 "1:\n\t"
369 "vld2.16 {q0,q1}, [%[src]]!\n\t"
370 "vadd.s16 q0, q0, q1\n\t"
371 "subs %[n], %[n], #1\n\t"
372 "vst1.16 {q0}, [%[dst]]!\n\t"
373 "bgt 1b\n\t"
374 // output operands (or input operands that get modified)
375 : [dst] "+r" (dst), [src] "+r" (src), [n] "+r" (n)
376 : // input operands
377 : "memory", "cc" // clobber list
378 );
379
380 while (i--) {
381 dst[0] = src[0] + src[1];
382 src += 2;
383 dst++;
324 } 384 }
325 } 385 }
326 386
327 static void remap_stereo_to_mono_neon(pa_remap_t *m, void *dst, const void *src, unsigned n) { 387 static void remap_stereo_to_mono_neon(pa_remap_t *m, void *dst, const void *src, unsigned n) {
328 switch (*m->format) { 388 switch (*m->format) {
329 case PA_SAMPLE_FLOAT32NE: 389 case PA_SAMPLE_FLOAT32NE:
330 stereo_to_mono_float(dst, src, n); 390 stereo_to_mono_float_neon(dst, src, n);
331 break; 391 break;
332 case PA_SAMPLE_S16NE: 392 case PA_SAMPLE_S16NE:
333 stereo_to_mono_int16(dst, src, n); 393 stereo_to_mono_int16_neon(dst, src, n);
334 break; 394 break;
335 default: 395 default:
336 pa_assert_not_reached(); 396 pa_assert_not_reached();
337 } 397 }
338 } 398 }
399
339 #define SAMPLES 1019 400 #define SAMPLES 1019
340 #define TIMES 10000 401 #define TIMES 500000
341 402
342 static void run_test_mono_to_stereo_float(void) { 403 static void run_test_mono_to_stereo_float(void) {
343 float stereo[2*SAMPLES]; 404 float stereo_a9[2*SAMPLES];
405 float stereo_a8[2*SAMPLES];
344 float stereo_ref[2*SAMPLES]; 406 float stereo_ref[2*SAMPLES];
345 float stereo_gen[2*SAMPLES]; 407 float stereo_gen[2*SAMPLES];
346 float mono[SAMPLES]; 408 float mono[SAMPLES];
347 int i; 409 int i;
348 pa_usec_t start, stop; 410 pa_usec_t start, stop;
349 pa_sample_format_t sf; 411 pa_sample_format_t sf;
350 pa_sample_spec iss, oss; 412 pa_sample_spec iss, oss;
351 pa_remap_t remap; 413 pa_remap_t remap;
352 414
353 pa_log_debug("checking NEON remap_mono_to_stereo(float, %d)", SAMPLES); 415 pa_log_debug("checking NEON remap_mono_to_stereo(float, %d)", SAMPLES);
354 416
355 memset(stereo_ref, 0, sizeof(stereo_ref)); 417 memset(stereo_ref, 0, sizeof(stereo_ref));
356 memset(stereo, 0, sizeof(stereo)); 418 memset(stereo_gen, 0, sizeof(stereo_gen));
419 memset(stereo_a9, 0, sizeof(stereo_a9));
420 memset(stereo_a8, 0, sizeof(stereo_a8));
357 421
358 for (i = 0; i < SAMPLES; i++) { 422 for (i = 0; i < SAMPLES; i++) {
359 mono[i] = rand()/(float) RAND_MAX - 0.5f; 423 mono[i] = rand()/(float) RAND_MAX - 0.5f;
360 } 424 }
361 425
368 remap.i_ss = &iss; 432 remap.i_ss = &iss;
369 remap.o_ss = &oss; 433 remap.o_ss = &oss;
370 remap.map_table_f[0][0] = 1.0; 434 remap.map_table_f[0][0] = 1.0;
371 remap.map_table_f[1][0] = 1.0; 435 remap.map_table_f[1][0] = 1.0;
372 436
437 remap_mono_to_stereo_neon_a9(&remap, stereo_a9, mono, SAMPLES);
438 remap_mono_to_stereo_neon_a8(&remap, stereo_a8, mono, SAMPLES);
373 remap_mono_to_stereo_c(&remap, stereo_ref, mono, SAMPLES); 439 remap_mono_to_stereo_c(&remap, stereo_ref, mono, SAMPLES);
374 remap_channels_matrix_c(&remap, stereo_gen, mono, SAMPLES); 440 remap_channels_matrix_c(&remap, stereo_gen, mono, SAMPLES);
375 remap_mono_to_stereo_neon(&remap, stereo, mono, SAMPLES);
376 441
377 for (i = 0; i < 2*SAMPLES; i++) { 442 for (i = 0; i < 2*SAMPLES; i++) {
378 if (fabsf(stereo[i] - stereo_ref[i]) > 0.00001) { 443 if (fabsf(stereo_a9[i] - stereo_ref[i]) > 0.00001) {
379 pa_log_debug("%d: %.3f != %.3f (%.3f)", i, stereo[i], stereo_ref[i], 444 pa_log_debug("NEON/A9 %d: %.3f != %.3f (%.3f)", i, stereo_a9[i], stereo_ref[i],
380 mono[i/2]); 445 mono[i/2]);
381 } 446 }
382 } 447 }
383 for (i = 0; i < 2*SAMPLES; i++) { 448 for (i = 0; i < 2*SAMPLES; i++) {
384 if (fabsf(stereo[i] - stereo_gen[i]) > 0.00001) { 449 if (fabsf(stereo_a8[i] - stereo_ref[i]) > 0.00001) {
385 pa_log_debug("%d: %.3f != %.3f (%.3f)", i, stereo[i], stereo_gen[i], 450 pa_log_debug("NEON/A8 %d: %.3f != %.3f (%.3f)", i, stereo_a8[i], stereo_ref[i],
386 mono[i/2]); 451 mono[i/2]);
387 } 452 }
388 } 453 }
389 454 for (i = 0; i < 2*SAMPLES; i++) {
390 start = pa_rtclock_now(); 455 if (fabsf(stereo_gen[i] - stereo_ref[i]) > 0.00001) {
391 for (i = 0; i < TIMES; i++) { 456 pa_log_debug("generic %d: %.3f != %.3f (%.3f)", i, stereo_gen[i], stereo_ref[i],
392 remap_mono_to_stereo_neon(&remap, stereo, mono, SAMPLES); 457 mono[i/2]);
393 } 458 }
394 stop = pa_rtclock_now(); 459 }
395 pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start));
396 460
397 start = pa_rtclock_now(); 461 start = pa_rtclock_now();
398 for (i = 0; i < TIMES; i++) { 462 for (i = 0; i < TIMES; i++) {
399 remap_mono_to_stereo_c(&remap, stereo_ref, mono, SAMPLES); 463 remap_mono_to_stereo_c(&remap, stereo_ref, mono, SAMPLES);
400 } 464 }
401 stop = pa_rtclock_now(); 465 stop = pa_rtclock_now();
402 pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); 466 pa_log_info("ref:\t\t%llu usec.", (long long unsigned int)(stop - start));
467
468 start = pa_rtclock_now();
469 for (i = 0; i < TIMES; i++) {
470 remap_mono_to_stereo_neon_a9(&remap, stereo_a9, mono, SAMPLES);
471 }
472 stop = pa_rtclock_now();
473 pa_log_info("NEON/A9:\t%llu usec.", (long long unsigned int)(stop - start));
474
475 start = pa_rtclock_now();
476 for (i = 0; i < TIMES; i++) {
477 remap_mono_to_stereo_neon_a8(&remap, stereo_a8, mono, SAMPLES);
478 }
479 stop = pa_rtclock_now();
480 pa_log_info("NEON/A8:\t%llu usec.", (long long unsigned int)(stop - start));
403 481
404 start = pa_rtclock_now(); 482 start = pa_rtclock_now();
405 for (i = 0; i < TIMES; i++) { 483 for (i = 0; i < TIMES; i++) {
406 remap_channels_matrix_c(&remap, stereo_gen, mono, SAMPLES); 484 remap_channels_matrix_c(&remap, stereo_gen, mono, SAMPLES);
407 } 485 }
408 stop = pa_rtclock_now(); 486 stop = pa_rtclock_now();
409 pa_log_info("generic: %llu usec.", (long long unsigned int)(stop - start)); 487 pa_log_info("generic:\t%llu usec.", (long long unsigned int)(stop - start));
410 } 488 }
411 489
412 static void run_test_stereo_to_mono_float(void) { 490 static void run_test_stereo_to_mono_float(void) {
413 float stereo[2*SAMPLES]; 491 float stereo[2*SAMPLES];
414 float mono_ref[SAMPLES]; 492 float mono_ref[SAMPLES];
454 start = pa_rtclock_now(); 532 start = pa_rtclock_now();
455 for (i = 0; i < TIMES; i++) { 533 for (i = 0; i < TIMES; i++) {
456 remap_stereo_to_mono_neon(&remap, mono, stereo, SAMPLES); 534 remap_stereo_to_mono_neon(&remap, mono, stereo, SAMPLES);
457 } 535 }
458 stop = pa_rtclock_now(); 536 stop = pa_rtclock_now();
459 pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); 537 pa_log_info("NEON:\t\t%llu usec.", (long long unsigned int)(stop - start));
460 538
461 start = pa_rtclock_now(); 539 start = pa_rtclock_now();
462 for (i = 0; i < TIMES; i++) { 540 for (i = 0; i < TIMES; i++) {
463 remap_stereo_to_mono_c(&remap, mono_ref, stereo, SAMPLES); 541 remap_stereo_to_mono_c(&remap, mono_ref, stereo, SAMPLES);
464 } 542 }
465 stop = pa_rtclock_now(); 543 stop = pa_rtclock_now();
466 pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); 544 pa_log_info("ref:\t\t%llu usec.", (long long unsigned int)(stop - start));
467 545
468 start = pa_rtclock_now(); 546 start = pa_rtclock_now();
469 for (i = 0; i < TIMES; i++) { 547 for (i = 0; i < TIMES; i++) {
470 remap_channels_matrix_c(&remap, mono_gen, stereo, SAMPLES); 548 remap_channels_matrix_c(&remap, mono_gen, stereo, SAMPLES);
471 } 549 }
472 stop = pa_rtclock_now(); 550 stop = pa_rtclock_now();
473 pa_log_info("generic: %llu usec.", (long long unsigned int)(stop - start)); 551 pa_log_info("generic:\t%llu usec.", (long long unsigned int)(stop - start));
474 } 552 }
475 553
476 static void run_test_mono_to_stereo_s16(void) { 554 static void run_test_mono_to_stereo_s16(void) {
477 int16_t stereo[2*SAMPLES]; 555 int16_t stereo_a9[2*SAMPLES];
556 int16_t stereo_a8[2*SAMPLES];
478 int16_t stereo_ref[2*SAMPLES]; 557 int16_t stereo_ref[2*SAMPLES];
479 int16_t stereo_gen[2*SAMPLES]; 558 int16_t stereo_gen[2*SAMPLES];
480 int16_t mono[SAMPLES]; 559 int16_t mono[SAMPLES];
481 int i; 560 int i;
482 pa_usec_t start, stop; 561 pa_usec_t start, stop;
485 pa_remap_t remap; 564 pa_remap_t remap;
486 565
487 pa_log_debug("checking NEON remap_mono_to_stereo(s16, %d)", SAMPLES); 566 pa_log_debug("checking NEON remap_mono_to_stereo(s16, %d)", SAMPLES);
488 567
489 memset(stereo_ref, 0, sizeof(stereo_ref)); 568 memset(stereo_ref, 0, sizeof(stereo_ref));
490 memset(stereo, 0, sizeof(stereo)); 569 memset(stereo_a9, 0, sizeof(stereo_a9));
570 memset(stereo_a8, 0, sizeof(stereo_a8));
571 memset(stereo_gen, 0, sizeof(stereo_gen));
491 572
492 for (i = 0; i < SAMPLES; i++) { 573 for (i = 0; i < SAMPLES; i++) {
493 mono[i] = rand() - RAND_MAX/2; 574 mono[i] = rand() - RAND_MAX/2;
494 } 575 }
495 576
499 iss.channels = 1; 580 iss.channels = 1;
500 oss.format = PA_SAMPLE_S16NE; 581 oss.format = PA_SAMPLE_S16NE;
501 oss.channels = 2; 582 oss.channels = 2;
502 remap.i_ss = &iss; 583 remap.i_ss = &iss;
503 remap.o_ss = &oss; 584 remap.o_ss = &oss;
504 remap.map_table_f[0][0] = 1.0; 585 remap.map_table_i[0][0] = 0x10000;
505 remap.map_table_f[1][0] = 1.0; 586 remap.map_table_i[1][0] = 0x10000;
506 587
507 remap_mono_to_stereo_c(&remap, stereo_ref, mono, SAMPLES); 588 remap_mono_to_stereo_c(&remap, stereo_ref, mono, SAMPLES);
508 remap_channels_matrix_c(&remap, stereo_gen, mono, SAMPLES); 589 remap_channels_matrix_c(&remap, stereo_gen, mono, SAMPLES);
509 remap_mono_to_stereo_neon(&remap, stereo, mono, SAMPLES); 590 remap_mono_to_stereo_neon_a9(&remap, stereo_a9, mono, SAMPLES);
591 remap_mono_to_stereo_neon_a8(&remap, stereo_a8, mono, SAMPLES);
510 592
511 for (i = 0; i < 2*SAMPLES; i++) { 593 for (i = 0; i < 2*SAMPLES; i++) {
512 if (abs(stereo[i] - stereo_ref[i]) > 0) { 594 if (abs(stereo_a9[i] - stereo_ref[i]) > 0) {
513 pa_log_debug("%d: %d != %d (%d)", i, stereo[i], stereo_ref[i], 595 pa_log_debug("NEON/A9 %d: %d != %d (%d)", i, stereo_a9[i], stereo_ref[i],
514 mono[i/2]); 596 mono[i/2]);
515 } 597 }
516 } 598 }
517
518 for (i = 0; i < 2*SAMPLES; i++) { 599 for (i = 0; i < 2*SAMPLES; i++) {
519 if (abs(stereo[i] - stereo_gen[i]) > 0) { 600 if (abs(stereo_a8[i] - stereo_ref[i]) > 0) {
520 pa_log_debug("%d: %d != %d (%d)", i, stereo[i], stereo_gen[i], 601 pa_log_debug("NEON/A8 %d: %d != %d (%d)", i, stereo_a8[i], stereo_ref[i],
521 mono[i/2]); 602 mono[i/2]);
522 } 603 }
523 } 604 }
524 605
525 start = pa_rtclock_now(); 606 for (i = 0; i < 2*SAMPLES; i++) {
526 for (i = 0; i < TIMES; i++) { 607 if (abs(stereo_gen[i] - stereo_ref[i]) > 0) {
527 remap_mono_to_stereo_neon(&remap, stereo, mono, SAMPLES); 608 pa_log_debug("generic %d: %d != %d (%d)", i, stereo_gen[i], stereo_ref[i],
528 } 609 mono[i/2]);
529 stop = pa_rtclock_now(); 610 }
530 pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); 611 }
612
613 start = pa_rtclock_now();
614 for (i = 0; i < TIMES; i++) {
615 remap_mono_to_stereo_neon_a9(&remap, stereo_a9, mono, SAMPLES);
616 }
617 stop = pa_rtclock_now();
618 pa_log_info("NEON/A9:\t%llu usec.", (long long unsigned int)(stop - start));
619
620 start = pa_rtclock_now();
621 for (i = 0; i < TIMES; i++) {
622 remap_mono_to_stereo_neon_a8(&remap, stereo_a8, mono, SAMPLES);
623 }
624 stop = pa_rtclock_now();
625 pa_log_info("NEON/A8:\t%llu usec.", (long long unsigned int)(stop - start));
531 626
532 start = pa_rtclock_now(); 627 start = pa_rtclock_now();
533 for (i = 0; i < TIMES; i++) { 628 for (i = 0; i < TIMES; i++) {
534 remap_mono_to_stereo_c(&remap, stereo_ref, mono, SAMPLES); 629 remap_mono_to_stereo_c(&remap, stereo_ref, mono, SAMPLES);
535 } 630 }
536 stop = pa_rtclock_now(); 631 stop = pa_rtclock_now();
537 pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); 632 pa_log_info("ref:\t\t%llu usec.", (long long unsigned int)(stop - start));
538 633
539 start = pa_rtclock_now(); 634 start = pa_rtclock_now();
540 for (i = 0; i < TIMES; i++) { 635 for (i = 0; i < TIMES; i++) {
541 remap_channels_matrix_c(&remap, stereo_gen, mono, SAMPLES); 636 remap_channels_matrix_c(&remap, stereo_gen, mono, SAMPLES);
542 } 637 }
543 stop = pa_rtclock_now(); 638 stop = pa_rtclock_now();
544 pa_log_info("generic: %llu usec.", (long long unsigned int)(stop - start)); 639 pa_log_info("generic:\t%llu usec.", (long long unsigned int)(stop - start));
545 } 640 }
546 641
547 static void run_test_stereo_to_mono_s16(void) { 642 static void run_test_stereo_to_mono_s16(void) {
548 int16_t stereo[2*SAMPLES]; 643 int16_t stereo[2*SAMPLES];
549 int16_t mono_ref[SAMPLES]; 644 int16_t mono_ref[SAMPLES];
556 pa_remap_t remap; 651 pa_remap_t remap;
557 652
558 pa_log_debug("checking NEON remap_stereo_to_mono(s16, %d)", SAMPLES); 653 pa_log_debug("checking NEON remap_stereo_to_mono(s16, %d)", SAMPLES);
559 654
560 memset(mono_ref, 0, sizeof(mono_ref)); 655 memset(mono_ref, 0, sizeof(mono_ref));
656 memset(mono_gen, 0, sizeof(mono_gen));
561 memset(mono, 0, sizeof(mono)); 657 memset(mono, 0, sizeof(mono));
562 658
563 for (i = 0; i < 2*SAMPLES; i++) { 659 for (i = 0; i < 2*SAMPLES; i++) {
564 stereo[i] = rand() - RAND_MAX/2; 660 stereo[i] = rand() - RAND_MAX/2;
565 } 661 }
570 iss.channels = 2; 666 iss.channels = 2;
571 oss.format = PA_SAMPLE_S16NE; 667 oss.format = PA_SAMPLE_S16NE;
572 oss.channels = 1; 668 oss.channels = 1;
573 remap.i_ss = &iss; 669 remap.i_ss = &iss;
574 remap.o_ss = &oss; 670 remap.o_ss = &oss;
575 remap.map_table_f[0][0] = 1.0; 671 remap.map_table_i[0][0] = 0x10000;
576 remap.map_table_f[0][1] = 1.0; 672 remap.map_table_i[0][1] = 0x10000;
577 673
578 remap_stereo_to_mono_c(&remap, mono_ref, stereo, SAMPLES); 674 remap_stereo_to_mono_c(&remap, mono_ref, stereo, SAMPLES);
579 remap_channels_matrix_c(&remap, mono_gen, stereo, SAMPLES); 675 remap_channels_matrix_c(&remap, mono_gen, stereo, SAMPLES);
580 remap_stereo_to_mono_neon(&remap, mono, stereo, SAMPLES); 676 remap_stereo_to_mono_neon(&remap, mono, stereo, SAMPLES);
581 677
595 start = pa_rtclock_now(); 691 start = pa_rtclock_now();
596 for (i = 0; i < TIMES; i++) { 692 for (i = 0; i < TIMES; i++) {
597 remap_stereo_to_mono_neon(&remap, mono, stereo, SAMPLES); 693 remap_stereo_to_mono_neon(&remap, mono, stereo, SAMPLES);
598 } 694 }
599 stop = pa_rtclock_now(); 695 stop = pa_rtclock_now();
600 pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); 696 pa_log_info("NEON:\t\t%llu usec.", (long long unsigned int)(stop - start));
601 697
602 start = pa_rtclock_now(); 698 start = pa_rtclock_now();
603 for (i = 0; i < TIMES; i++) { 699 for (i = 0; i < TIMES; i++) {
604 remap_stereo_to_mono_c(&remap, mono_ref, stereo, SAMPLES); 700 remap_stereo_to_mono_c(&remap, mono_ref, stereo, SAMPLES);
605 } 701 }
606 stop = pa_rtclock_now(); 702 stop = pa_rtclock_now();
607 pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); 703 pa_log_info("ref:\t\t%llu usec.", (long long unsigned int)(stop - start));
608 704
609 start = pa_rtclock_now(); 705 start = pa_rtclock_now();
610 for (i = 0; i < TIMES; i++) { 706 for (i = 0; i < TIMES; i++) {
611 remap_channels_matrix_c(&remap, mono_gen, stereo, SAMPLES); 707 remap_channels_matrix_c(&remap, mono_gen, stereo, SAMPLES);
612 } 708 }
613 stop = pa_rtclock_now(); 709 stop = pa_rtclock_now();
614 pa_log_info("generic: %llu usec.", (long long unsigned int)(stop - start)); 710 pa_log_info("generic:\t%llu usec.", (long long unsigned int)(stop - start));
615 } 711 }
616
617 712
618 #endif /* defined(__arm__) */ 713 #endif /* defined(__arm__) */
619 714
620 int main() { 715 int main() {
621
622 run_test_stereo_to_mono_float(); 716 run_test_stereo_to_mono_float();
623 run_test_stereo_to_mono_s16(); 717 run_test_stereo_to_mono_s16();
624 718
625 run_test_mono_to_stereo_float(); 719 run_test_mono_to_stereo_float();
626 run_test_mono_to_stereo_s16(); 720 run_test_mono_to_stereo_s16();
627 721
628
629 return EXIT_SUCCESS; 722 return EXIT_SUCCESS;
630 } 723 }

Repositories maintained by Peter Meerwald, pmeerw@pmeerw.net.