comparison svolume_neon.c @ 5:07763f536182 default tip

ALIGNment support
author Peter Meerwald <p.meerwald@bct-electronic.com>
date Sun, 08 Jul 2012 21:48:08 +0200
parents 1f6289166006
children
comparison
equal deleted inserted replaced
4:1f6289166006 5:07763f536182
338 338
339 #define SAMPLES 1019 339 #define SAMPLES 1019
340 #define TIMES 50000 340 #define TIMES 50000
341 #define CHANNELS 4 341 #define CHANNELS 4
342 #define PADDING 16 342 #define PADDING 16
343 #define ALIGN 1
343 344
344 static void run_test_float(void) { 345 static void run_test_float(void) {
345 float floats[SAMPLES]; 346 float floats[SAMPLES+ALIGN];
346 float floats_ref[SAMPLES]; 347 float floats_ref[SAMPLES+ALIGN];
347 float floats_orig[SAMPLES]; 348 float floats_orig[SAMPLES+ALIGN];
348 float volumes[CHANNELS]; 349 float volumes[CHANNELS];
349 unsigned i; 350 unsigned i;
350 pa_usec_t start, stop; 351 pa_usec_t start, stop;
351 352
352 pa_log_debug("checking NEON volume_float32ne(%d)", SAMPLES); 353 pa_log_debug("checking NEON volume_float32ne(%d)", SAMPLES);
353 354
354 for (i = 0; i < SAMPLES; i++) { 355 for (i = 0; i < SAMPLES+ALIGN; i++) {
355 floats_orig[i] = rand()/(float) RAND_MAX - 0.5f; 356 floats_orig[i] = rand()/(float) RAND_MAX - 0.5f;
356 } 357 }
357 memcpy(floats_ref, floats_orig, sizeof(floats_orig)); 358 memcpy(floats_ref, floats_orig, sizeof(floats_orig));
358 memcpy(floats, floats_orig, sizeof(floats_orig)); 359 memcpy(floats, floats_orig, sizeof(floats_orig));
359 360
360 for (i = 0; i < CHANNELS; i++) 361 for (i = 0; i < CHANNELS; i++)
361 volumes[i] = 0.5f * rand() / (float) RAND_MAX; 362 volumes[i] = 0.5f * rand() / (float) RAND_MAX;
362 363
363 pa_volume_float32ne_neon(floats, volumes, CHANNELS, sizeof(floats)); 364 pa_volume_float32ne_neon(floats+ALIGN, volumes, CHANNELS, sizeof(floats));
364 pa_volume_float32ne_c(floats_ref, volumes, CHANNELS, sizeof(floats_ref)); 365 pa_volume_float32ne_c(floats_ref+ALIGN, volumes, CHANNELS, sizeof(floats_ref));
365 366
366 for (i = 0; i < SAMPLES; i++) { 367 for (i = ALIGN; i < SAMPLES+ALIGN; i++) {
367 if (fabsf(floats[i] - floats_ref[i]) > 0.00001) { 368 if (fabsf(floats[i] - floats_ref[i]) > 0.00001) {
368 pa_log_debug("%d: %.3f != %.3f (%.3f)", i, floats[i], floats_ref[i], 369 pa_log_debug("%d: %.3f != %.3f (%.3f)", i, floats[i], floats_ref[i],
369 floats_orig[i]); 370 floats_orig[i]);
370 } 371 }
371 } 372 }
372 373
373 start = pa_rtclock_now(); 374 start = pa_rtclock_now();
374 for (i = 0; i < TIMES; i++) { 375 for (i = 0; i < TIMES; i++) {
375 memcpy(floats, floats_orig, sizeof(floats_orig)); 376 memcpy(floats, floats_orig, sizeof(floats_orig));
376 pa_volume_float32ne_neon(floats, volumes, CHANNELS, sizeof(floats)); 377 pa_volume_float32ne_neon(floats+ALIGN, volumes, CHANNELS, sizeof(floats));
377 } 378 }
378 stop = pa_rtclock_now(); 379 stop = pa_rtclock_now();
379 pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); 380 pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start));
380 381
381 start = pa_rtclock_now(); 382 start = pa_rtclock_now();
382 for (i = 0; i < TIMES; i++) { 383 for (i = 0; i < TIMES; i++) {
383 memcpy(floats_ref, floats_orig, sizeof(floats_orig)); 384 memcpy(floats_ref, floats_orig, sizeof(floats_orig));
384 pa_volume_float32ne_c(floats_ref, volumes, CHANNELS, sizeof(floats_ref)); 385 pa_volume_float32ne_c(floats_ref+ALIGN, volumes, CHANNELS, sizeof(floats_ref));
385 } 386 }
386 stop = pa_rtclock_now(); 387 stop = pa_rtclock_now();
387 pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); 388 pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start));
388 } 389 }
389 390
390 static void run_test_s16(void) { 391 static void run_test_s16(void) {
391 int16_t samples[SAMPLES]; 392 int16_t samples[SAMPLES+ALIGN];
392 int16_t samples_ref[SAMPLES]; 393 int16_t samples_ref[SAMPLES+ALIGN];
393 int16_t samples_orig[SAMPLES]; 394 int16_t samples_orig[SAMPLES+ALIGN];
394 uint32_t volumes[CHANNELS + PADDING]; 395 uint32_t volumes[CHANNELS + PADDING];
395 unsigned i, padding; 396 unsigned i, padding;
396 pa_usec_t start, stop; 397 pa_usec_t start, stop;
397 398
398 pa_log_debug("checking NEON volume_s16ne(%d)", SAMPLES); 399 pa_log_debug("checking NEON volume_s16ne(%d)", SAMPLES);
406 for (i = 0; i < CHANNELS; i++) 407 for (i = 0; i < CHANNELS; i++)
407 volumes[i] = PA_CLAMP_VOLUME(rand() >> 15); 408 volumes[i] = PA_CLAMP_VOLUME(rand() >> 15);
408 for (padding = 0; padding < PADDING; padding++, i++) 409 for (padding = 0; padding < PADDING; padding++, i++)
409 volumes[i] = volumes[padding]; 410 volumes[i] = volumes[padding];
410 411
411 pa_volume_s16ne_neon(samples, volumes, CHANNELS, sizeof(samples)); 412 pa_volume_s16ne_neon(samples+ALIGN, volumes, CHANNELS, sizeof(samples));
412 pa_volume_s16ne_c(samples_ref, volumes, CHANNELS, sizeof(samples_ref)); 413 pa_volume_s16ne_c(samples_ref+ALIGN, volumes, CHANNELS, sizeof(samples_ref));
413 414
414 for (i = 0; i < SAMPLES; i++) { 415 for (i = ALIGN; i < SAMPLES+ALIGN; i++) {
415 if (abs(samples[i] - samples_ref[i]) > 0) { 416 if (abs(samples[i] - samples_ref[i]) > 0) {
416 pa_log_debug("%d: %d != %d (%d)", i, samples[i], samples_ref[i], 417 pa_log_debug("%d: %d != %d (%d)", i, samples[i], samples_ref[i],
417 samples_orig[i]); 418 samples_orig[i]);
418 } 419 }
419 } 420 }
420 exit(0); 421
421 start = pa_rtclock_now(); 422 start = pa_rtclock_now();
422 for (i = 0; i < TIMES; i++) { 423 for (i = 0; i < TIMES; i++) {
423 memcpy(samples, samples_orig, sizeof(samples_orig)); 424 memcpy(samples, samples_orig, sizeof(samples_orig));
424 pa_volume_s16ne_neon(samples, volumes, CHANNELS, sizeof(samples)); 425 pa_volume_s16ne_neon(samples+ALIGN, volumes, CHANNELS, sizeof(samples));
425 } 426 }
426 stop = pa_rtclock_now(); 427 stop = pa_rtclock_now();
427 pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); 428 pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start));
428 429
429 start = pa_rtclock_now(); 430 start = pa_rtclock_now();
430 for (i = 0; i < TIMES; i++) { 431 for (i = 0; i < TIMES; i++) {
431 memcpy(samples, samples_orig, sizeof(samples_orig)); 432 memcpy(samples, samples_orig, sizeof(samples_orig));
432 pa_volume_s16ne_arm(samples, volumes, CHANNELS, sizeof(samples)); 433 pa_volume_s16ne_arm(samples+ALIGN, volumes, CHANNELS, sizeof(samples));
433 } 434 }
434 stop = pa_rtclock_now(); 435 stop = pa_rtclock_now();
435 pa_log_info("ARM: %llu usec.", (long long unsigned int)(stop - start)); 436 pa_log_info("ARM: %llu usec.", (long long unsigned int)(stop - start));
436 437
437 start = pa_rtclock_now(); 438 start = pa_rtclock_now();

Repositories maintained by Peter Meerwald, pmeerw@pmeerw.net.