Mercurial > hg > pa-neon
comparison svolume_neon.c @ 5:07763f536182 default tip
ALIGNment support
| author | Peter Meerwald <p.meerwald@bct-electronic.com> |
|---|---|
| date | Sun, 08 Jul 2012 21:48:08 +0200 |
| parents | 1f6289166006 |
| children |
comparison
equal
deleted
inserted
replaced
| 4:1f6289166006 | 5:07763f536182 |
|---|---|
| 338 | 338 |
| 339 #define SAMPLES 1019 | 339 #define SAMPLES 1019 |
| 340 #define TIMES 50000 | 340 #define TIMES 50000 |
| 341 #define CHANNELS 4 | 341 #define CHANNELS 4 |
| 342 #define PADDING 16 | 342 #define PADDING 16 |
| 343 #define ALIGN 1 | |
| 343 | 344 |
| 344 static void run_test_float(void) { | 345 static void run_test_float(void) { |
| 345 float floats[SAMPLES]; | 346 float floats[SAMPLES+ALIGN]; |
| 346 float floats_ref[SAMPLES]; | 347 float floats_ref[SAMPLES+ALIGN]; |
| 347 float floats_orig[SAMPLES]; | 348 float floats_orig[SAMPLES+ALIGN]; |
| 348 float volumes[CHANNELS]; | 349 float volumes[CHANNELS]; |
| 349 unsigned i; | 350 unsigned i; |
| 350 pa_usec_t start, stop; | 351 pa_usec_t start, stop; |
| 351 | 352 |
| 352 pa_log_debug("checking NEON volume_float32ne(%d)", SAMPLES); | 353 pa_log_debug("checking NEON volume_float32ne(%d)", SAMPLES); |
| 353 | 354 |
| 354 for (i = 0; i < SAMPLES; i++) { | 355 for (i = 0; i < SAMPLES+ALIGN; i++) { |
| 355 floats_orig[i] = rand()/(float) RAND_MAX - 0.5f; | 356 floats_orig[i] = rand()/(float) RAND_MAX - 0.5f; |
| 356 } | 357 } |
| 357 memcpy(floats_ref, floats_orig, sizeof(floats_orig)); | 358 memcpy(floats_ref, floats_orig, sizeof(floats_orig)); |
| 358 memcpy(floats, floats_orig, sizeof(floats_orig)); | 359 memcpy(floats, floats_orig, sizeof(floats_orig)); |
| 359 | 360 |
| 360 for (i = 0; i < CHANNELS; i++) | 361 for (i = 0; i < CHANNELS; i++) |
| 361 volumes[i] = 0.5f * rand() / (float) RAND_MAX; | 362 volumes[i] = 0.5f * rand() / (float) RAND_MAX; |
| 362 | 363 |
| 363 pa_volume_float32ne_neon(floats, volumes, CHANNELS, sizeof(floats)); | 364 pa_volume_float32ne_neon(floats+ALIGN, volumes, CHANNELS, sizeof(floats)); |
| 364 pa_volume_float32ne_c(floats_ref, volumes, CHANNELS, sizeof(floats_ref)); | 365 pa_volume_float32ne_c(floats_ref+ALIGN, volumes, CHANNELS, sizeof(floats_ref)); |
| 365 | 366 |
| 366 for (i = 0; i < SAMPLES; i++) { | 367 for (i = ALIGN; i < SAMPLES+ALIGN; i++) { |
| 367 if (fabsf(floats[i] - floats_ref[i]) > 0.00001) { | 368 if (fabsf(floats[i] - floats_ref[i]) > 0.00001) { |
| 368 pa_log_debug("%d: %.3f != %.3f (%.3f)", i, floats[i], floats_ref[i], | 369 pa_log_debug("%d: %.3f != %.3f (%.3f)", i, floats[i], floats_ref[i], |
| 369 floats_orig[i]); | 370 floats_orig[i]); |
| 370 } | 371 } |
| 371 } | 372 } |
| 372 | 373 |
| 373 start = pa_rtclock_now(); | 374 start = pa_rtclock_now(); |
| 374 for (i = 0; i < TIMES; i++) { | 375 for (i = 0; i < TIMES; i++) { |
| 375 memcpy(floats, floats_orig, sizeof(floats_orig)); | 376 memcpy(floats, floats_orig, sizeof(floats_orig)); |
| 376 pa_volume_float32ne_neon(floats, volumes, CHANNELS, sizeof(floats)); | 377 pa_volume_float32ne_neon(floats+ALIGN, volumes, CHANNELS, sizeof(floats)); |
| 377 } | 378 } |
| 378 stop = pa_rtclock_now(); | 379 stop = pa_rtclock_now(); |
| 379 pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); | 380 pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); |
| 380 | 381 |
| 381 start = pa_rtclock_now(); | 382 start = pa_rtclock_now(); |
| 382 for (i = 0; i < TIMES; i++) { | 383 for (i = 0; i < TIMES; i++) { |
| 383 memcpy(floats_ref, floats_orig, sizeof(floats_orig)); | 384 memcpy(floats_ref, floats_orig, sizeof(floats_orig)); |
| 384 pa_volume_float32ne_c(floats_ref, volumes, CHANNELS, sizeof(floats_ref)); | 385 pa_volume_float32ne_c(floats_ref+ALIGN, volumes, CHANNELS, sizeof(floats_ref)); |
| 385 } | 386 } |
| 386 stop = pa_rtclock_now(); | 387 stop = pa_rtclock_now(); |
| 387 pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); | 388 pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); |
| 388 } | 389 } |
| 389 | 390 |
| 390 static void run_test_s16(void) { | 391 static void run_test_s16(void) { |
| 391 int16_t samples[SAMPLES]; | 392 int16_t samples[SAMPLES+ALIGN]; |
| 392 int16_t samples_ref[SAMPLES]; | 393 int16_t samples_ref[SAMPLES+ALIGN]; |
| 393 int16_t samples_orig[SAMPLES]; | 394 int16_t samples_orig[SAMPLES+ALIGN]; |
| 394 uint32_t volumes[CHANNELS + PADDING]; | 395 uint32_t volumes[CHANNELS + PADDING]; |
| 395 unsigned i, padding; | 396 unsigned i, padding; |
| 396 pa_usec_t start, stop; | 397 pa_usec_t start, stop; |
| 397 | 398 |
| 398 pa_log_debug("checking NEON volume_s16ne(%d)", SAMPLES); | 399 pa_log_debug("checking NEON volume_s16ne(%d)", SAMPLES); |
| 406 for (i = 0; i < CHANNELS; i++) | 407 for (i = 0; i < CHANNELS; i++) |
| 407 volumes[i] = PA_CLAMP_VOLUME(rand() >> 15); | 408 volumes[i] = PA_CLAMP_VOLUME(rand() >> 15); |
| 408 for (padding = 0; padding < PADDING; padding++, i++) | 409 for (padding = 0; padding < PADDING; padding++, i++) |
| 409 volumes[i] = volumes[padding]; | 410 volumes[i] = volumes[padding]; |
| 410 | 411 |
| 411 pa_volume_s16ne_neon(samples, volumes, CHANNELS, sizeof(samples)); | 412 pa_volume_s16ne_neon(samples+ALIGN, volumes, CHANNELS, sizeof(samples)); |
| 412 pa_volume_s16ne_c(samples_ref, volumes, CHANNELS, sizeof(samples_ref)); | 413 pa_volume_s16ne_c(samples_ref+ALIGN, volumes, CHANNELS, sizeof(samples_ref)); |
| 413 | 414 |
| 414 for (i = 0; i < SAMPLES; i++) { | 415 for (i = ALIGN; i < SAMPLES+ALIGN; i++) { |
| 415 if (abs(samples[i] - samples_ref[i]) > 0) { | 416 if (abs(samples[i] - samples_ref[i]) > 0) { |
| 416 pa_log_debug("%d: %d != %d (%d)", i, samples[i], samples_ref[i], | 417 pa_log_debug("%d: %d != %d (%d)", i, samples[i], samples_ref[i], |
| 417 samples_orig[i]); | 418 samples_orig[i]); |
| 418 } | 419 } |
| 419 } | 420 } |
| 420 exit(0); | 421 |
| 421 start = pa_rtclock_now(); | 422 start = pa_rtclock_now(); |
| 422 for (i = 0; i < TIMES; i++) { | 423 for (i = 0; i < TIMES; i++) { |
| 423 memcpy(samples, samples_orig, sizeof(samples_orig)); | 424 memcpy(samples, samples_orig, sizeof(samples_orig)); |
| 424 pa_volume_s16ne_neon(samples, volumes, CHANNELS, sizeof(samples)); | 425 pa_volume_s16ne_neon(samples+ALIGN, volumes, CHANNELS, sizeof(samples)); |
| 425 } | 426 } |
| 426 stop = pa_rtclock_now(); | 427 stop = pa_rtclock_now(); |
| 427 pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); | 428 pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); |
| 428 | 429 |
| 429 start = pa_rtclock_now(); | 430 start = pa_rtclock_now(); |
| 430 for (i = 0; i < TIMES; i++) { | 431 for (i = 0; i < TIMES; i++) { |
| 431 memcpy(samples, samples_orig, sizeof(samples_orig)); | 432 memcpy(samples, samples_orig, sizeof(samples_orig)); |
| 432 pa_volume_s16ne_arm(samples, volumes, CHANNELS, sizeof(samples)); | 433 pa_volume_s16ne_arm(samples+ALIGN, volumes, CHANNELS, sizeof(samples)); |
| 433 } | 434 } |
| 434 stop = pa_rtclock_now(); | 435 stop = pa_rtclock_now(); |
| 435 pa_log_info("ARM: %llu usec.", (long long unsigned int)(stop - start)); | 436 pa_log_info("ARM: %llu usec.", (long long unsigned int)(stop - start)); |
| 436 | 437 |
| 437 start = pa_rtclock_now(); | 438 start = pa_rtclock_now(); |
