Mercurial > hg > pa-neon
comparison svolume_neon.c @ 5:07763f536182 default tip
ALIGNment support
author | Peter Meerwald <p.meerwald@bct-electronic.com> |
---|---|
date | Sun, 08 Jul 2012 21:48:08 +0200 |
parents | 1f6289166006 |
children |
comparison
equal
deleted
inserted
replaced
4:1f6289166006 | 5:07763f536182 |
---|---|
338 | 338 |
339 #define SAMPLES 1019 | 339 #define SAMPLES 1019 |
340 #define TIMES 50000 | 340 #define TIMES 50000 |
341 #define CHANNELS 4 | 341 #define CHANNELS 4 |
342 #define PADDING 16 | 342 #define PADDING 16 |
343 #define ALIGN 1 | |
343 | 344 |
344 static void run_test_float(void) { | 345 static void run_test_float(void) { |
345 float floats[SAMPLES]; | 346 float floats[SAMPLES+ALIGN]; |
346 float floats_ref[SAMPLES]; | 347 float floats_ref[SAMPLES+ALIGN]; |
347 float floats_orig[SAMPLES]; | 348 float floats_orig[SAMPLES+ALIGN]; |
348 float volumes[CHANNELS]; | 349 float volumes[CHANNELS]; |
349 unsigned i; | 350 unsigned i; |
350 pa_usec_t start, stop; | 351 pa_usec_t start, stop; |
351 | 352 |
352 pa_log_debug("checking NEON volume_float32ne(%d)", SAMPLES); | 353 pa_log_debug("checking NEON volume_float32ne(%d)", SAMPLES); |
353 | 354 |
354 for (i = 0; i < SAMPLES; i++) { | 355 for (i = 0; i < SAMPLES+ALIGN; i++) { |
355 floats_orig[i] = rand()/(float) RAND_MAX - 0.5f; | 356 floats_orig[i] = rand()/(float) RAND_MAX - 0.5f; |
356 } | 357 } |
357 memcpy(floats_ref, floats_orig, sizeof(floats_orig)); | 358 memcpy(floats_ref, floats_orig, sizeof(floats_orig)); |
358 memcpy(floats, floats_orig, sizeof(floats_orig)); | 359 memcpy(floats, floats_orig, sizeof(floats_orig)); |
359 | 360 |
360 for (i = 0; i < CHANNELS; i++) | 361 for (i = 0; i < CHANNELS; i++) |
361 volumes[i] = 0.5f * rand() / (float) RAND_MAX; | 362 volumes[i] = 0.5f * rand() / (float) RAND_MAX; |
362 | 363 |
363 pa_volume_float32ne_neon(floats, volumes, CHANNELS, sizeof(floats)); | 364 pa_volume_float32ne_neon(floats+ALIGN, volumes, CHANNELS, sizeof(floats)); |
364 pa_volume_float32ne_c(floats_ref, volumes, CHANNELS, sizeof(floats_ref)); | 365 pa_volume_float32ne_c(floats_ref+ALIGN, volumes, CHANNELS, sizeof(floats_ref)); |
365 | 366 |
366 for (i = 0; i < SAMPLES; i++) { | 367 for (i = ALIGN; i < SAMPLES+ALIGN; i++) { |
367 if (fabsf(floats[i] - floats_ref[i]) > 0.00001) { | 368 if (fabsf(floats[i] - floats_ref[i]) > 0.00001) { |
368 pa_log_debug("%d: %.3f != %.3f (%.3f)", i, floats[i], floats_ref[i], | 369 pa_log_debug("%d: %.3f != %.3f (%.3f)", i, floats[i], floats_ref[i], |
369 floats_orig[i]); | 370 floats_orig[i]); |
370 } | 371 } |
371 } | 372 } |
372 | 373 |
373 start = pa_rtclock_now(); | 374 start = pa_rtclock_now(); |
374 for (i = 0; i < TIMES; i++) { | 375 for (i = 0; i < TIMES; i++) { |
375 memcpy(floats, floats_orig, sizeof(floats_orig)); | 376 memcpy(floats, floats_orig, sizeof(floats_orig)); |
376 pa_volume_float32ne_neon(floats, volumes, CHANNELS, sizeof(floats)); | 377 pa_volume_float32ne_neon(floats+ALIGN, volumes, CHANNELS, sizeof(floats)); |
377 } | 378 } |
378 stop = pa_rtclock_now(); | 379 stop = pa_rtclock_now(); |
379 pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); | 380 pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); |
380 | 381 |
381 start = pa_rtclock_now(); | 382 start = pa_rtclock_now(); |
382 for (i = 0; i < TIMES; i++) { | 383 for (i = 0; i < TIMES; i++) { |
383 memcpy(floats_ref, floats_orig, sizeof(floats_orig)); | 384 memcpy(floats_ref, floats_orig, sizeof(floats_orig)); |
384 pa_volume_float32ne_c(floats_ref, volumes, CHANNELS, sizeof(floats_ref)); | 385 pa_volume_float32ne_c(floats_ref+ALIGN, volumes, CHANNELS, sizeof(floats_ref)); |
385 } | 386 } |
386 stop = pa_rtclock_now(); | 387 stop = pa_rtclock_now(); |
387 pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); | 388 pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); |
388 } | 389 } |
389 | 390 |
390 static void run_test_s16(void) { | 391 static void run_test_s16(void) { |
391 int16_t samples[SAMPLES]; | 392 int16_t samples[SAMPLES+ALIGN]; |
392 int16_t samples_ref[SAMPLES]; | 393 int16_t samples_ref[SAMPLES+ALIGN]; |
393 int16_t samples_orig[SAMPLES]; | 394 int16_t samples_orig[SAMPLES+ALIGN]; |
394 uint32_t volumes[CHANNELS + PADDING]; | 395 uint32_t volumes[CHANNELS + PADDING]; |
395 unsigned i, padding; | 396 unsigned i, padding; |
396 pa_usec_t start, stop; | 397 pa_usec_t start, stop; |
397 | 398 |
398 pa_log_debug("checking NEON volume_s16ne(%d)", SAMPLES); | 399 pa_log_debug("checking NEON volume_s16ne(%d)", SAMPLES); |
406 for (i = 0; i < CHANNELS; i++) | 407 for (i = 0; i < CHANNELS; i++) |
407 volumes[i] = PA_CLAMP_VOLUME(rand() >> 15); | 408 volumes[i] = PA_CLAMP_VOLUME(rand() >> 15); |
408 for (padding = 0; padding < PADDING; padding++, i++) | 409 for (padding = 0; padding < PADDING; padding++, i++) |
409 volumes[i] = volumes[padding]; | 410 volumes[i] = volumes[padding]; |
410 | 411 |
411 pa_volume_s16ne_neon(samples, volumes, CHANNELS, sizeof(samples)); | 412 pa_volume_s16ne_neon(samples+ALIGN, volumes, CHANNELS, sizeof(samples)); |
412 pa_volume_s16ne_c(samples_ref, volumes, CHANNELS, sizeof(samples_ref)); | 413 pa_volume_s16ne_c(samples_ref+ALIGN, volumes, CHANNELS, sizeof(samples_ref)); |
413 | 414 |
414 for (i = 0; i < SAMPLES; i++) { | 415 for (i = ALIGN; i < SAMPLES+ALIGN; i++) { |
415 if (abs(samples[i] - samples_ref[i]) > 0) { | 416 if (abs(samples[i] - samples_ref[i]) > 0) { |
416 pa_log_debug("%d: %d != %d (%d)", i, samples[i], samples_ref[i], | 417 pa_log_debug("%d: %d != %d (%d)", i, samples[i], samples_ref[i], |
417 samples_orig[i]); | 418 samples_orig[i]); |
418 } | 419 } |
419 } | 420 } |
420 exit(0); | 421 |
421 start = pa_rtclock_now(); | 422 start = pa_rtclock_now(); |
422 for (i = 0; i < TIMES; i++) { | 423 for (i = 0; i < TIMES; i++) { |
423 memcpy(samples, samples_orig, sizeof(samples_orig)); | 424 memcpy(samples, samples_orig, sizeof(samples_orig)); |
424 pa_volume_s16ne_neon(samples, volumes, CHANNELS, sizeof(samples)); | 425 pa_volume_s16ne_neon(samples+ALIGN, volumes, CHANNELS, sizeof(samples)); |
425 } | 426 } |
426 stop = pa_rtclock_now(); | 427 stop = pa_rtclock_now(); |
427 pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); | 428 pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); |
428 | 429 |
429 start = pa_rtclock_now(); | 430 start = pa_rtclock_now(); |
430 for (i = 0; i < TIMES; i++) { | 431 for (i = 0; i < TIMES; i++) { |
431 memcpy(samples, samples_orig, sizeof(samples_orig)); | 432 memcpy(samples, samples_orig, sizeof(samples_orig)); |
432 pa_volume_s16ne_arm(samples, volumes, CHANNELS, sizeof(samples)); | 433 pa_volume_s16ne_arm(samples+ALIGN, volumes, CHANNELS, sizeof(samples)); |
433 } | 434 } |
434 stop = pa_rtclock_now(); | 435 stop = pa_rtclock_now(); |
435 pa_log_info("ARM: %llu usec.", (long long unsigned int)(stop - start)); | 436 pa_log_info("ARM: %llu usec.", (long long unsigned int)(stop - start)); |
436 | 437 |
437 start = pa_rtclock_now(); | 438 start = pa_rtclock_now(); |