diff svolume_neon.c @ 5:07763f536182 default tip

ALIGNment support
author Peter Meerwald <p.meerwald@bct-electronic.com>
date Sun, 08 Jul 2012 21:48:08 +0200
parents 1f6289166006
children
line wrap: on
line diff
--- a/svolume_neon.c	Sun Jul 08 21:03:41 2012 +0200
+++ b/svolume_neon.c	Sun Jul 08 21:48:08 2012 +0200
@@ -340,18 +340,19 @@
 #define TIMES 50000
 #define CHANNELS 4
 #define PADDING 16
+#define ALIGN 1
 
 static void run_test_float(void) {
-    float floats[SAMPLES];
-    float floats_ref[SAMPLES];
-    float floats_orig[SAMPLES];
+    float floats[SAMPLES+ALIGN];
+    float floats_ref[SAMPLES+ALIGN];
+    float floats_orig[SAMPLES+ALIGN];
     float volumes[CHANNELS];
     unsigned i;
     pa_usec_t start, stop;
 
     pa_log_debug("checking NEON volume_float32ne(%d)", SAMPLES);
 
-    for (i = 0; i < SAMPLES; i++) {
+    for (i = 0; i < SAMPLES+ALIGN; i++) {
         floats_orig[i] = rand()/(float) RAND_MAX - 0.5f;
     }
     memcpy(floats_ref, floats_orig, sizeof(floats_orig));
@@ -360,10 +361,10 @@
     for (i = 0; i < CHANNELS; i++)
         volumes[i] = 0.5f * rand() / (float) RAND_MAX;
 
-    pa_volume_float32ne_neon(floats, volumes, CHANNELS, sizeof(floats));
-    pa_volume_float32ne_c(floats_ref, volumes, CHANNELS, sizeof(floats_ref));
+    pa_volume_float32ne_neon(floats+ALIGN, volumes, CHANNELS, sizeof(floats));
+    pa_volume_float32ne_c(floats_ref+ALIGN, volumes, CHANNELS, sizeof(floats_ref));
 
-    for (i = 0; i < SAMPLES; i++) {
+    for (i = ALIGN; i < SAMPLES+ALIGN; i++) {
         if (fabsf(floats[i] - floats_ref[i]) > 0.00001) {
             pa_log_debug("%d: %.3f != %.3f (%.3f)", i, floats[i], floats_ref[i],
                       floats_orig[i]);
@@ -373,7 +374,7 @@
     start = pa_rtclock_now();
     for (i = 0; i < TIMES; i++) {
         memcpy(floats, floats_orig, sizeof(floats_orig));
-        pa_volume_float32ne_neon(floats, volumes, CHANNELS, sizeof(floats));
+        pa_volume_float32ne_neon(floats+ALIGN, volumes, CHANNELS, sizeof(floats));
     }
     stop = pa_rtclock_now();
     pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start));
@@ -381,16 +382,16 @@
     start = pa_rtclock_now();
     for (i = 0; i < TIMES; i++) {
         memcpy(floats_ref, floats_orig, sizeof(floats_orig));
-        pa_volume_float32ne_c(floats_ref, volumes, CHANNELS, sizeof(floats_ref));
+        pa_volume_float32ne_c(floats_ref+ALIGN, volumes, CHANNELS, sizeof(floats_ref));
     }
     stop = pa_rtclock_now();
     pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start));
 }
 
 static void run_test_s16(void) {
-    int16_t samples[SAMPLES];
-    int16_t samples_ref[SAMPLES];
-    int16_t samples_orig[SAMPLES];
+    int16_t samples[SAMPLES+ALIGN];
+    int16_t samples_ref[SAMPLES+ALIGN];
+    int16_t samples_orig[SAMPLES+ALIGN];
     uint32_t volumes[CHANNELS + PADDING];
     unsigned i, padding;
     pa_usec_t start, stop;
@@ -408,20 +409,20 @@
     for (padding = 0; padding < PADDING; padding++, i++)
         volumes[i] = volumes[padding];
 
-    pa_volume_s16ne_neon(samples, volumes, CHANNELS, sizeof(samples));
-    pa_volume_s16ne_c(samples_ref, volumes, CHANNELS, sizeof(samples_ref));
+    pa_volume_s16ne_neon(samples+ALIGN, volumes, CHANNELS, sizeof(samples));
+    pa_volume_s16ne_c(samples_ref+ALIGN, volumes, CHANNELS, sizeof(samples_ref));
 
-    for (i = 0; i < SAMPLES; i++) {
+    for (i = ALIGN; i < SAMPLES+ALIGN; i++) {
         if (abs(samples[i] - samples_ref[i]) > 0) {
             pa_log_debug("%d: %d != %d (%d)", i, samples[i], samples_ref[i],
                       samples_orig[i]);
         }
     }
-exit(0);
+
     start = pa_rtclock_now();
     for (i = 0; i < TIMES; i++) {
         memcpy(samples, samples_orig, sizeof(samples_orig));
-        pa_volume_s16ne_neon(samples, volumes, CHANNELS, sizeof(samples));
+        pa_volume_s16ne_neon(samples+ALIGN, volumes, CHANNELS, sizeof(samples));
     }
     stop = pa_rtclock_now();
     pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start));
@@ -429,7 +430,7 @@
     start = pa_rtclock_now();
     for (i = 0; i < TIMES; i++) {
         memcpy(samples, samples_orig, sizeof(samples_orig));
-        pa_volume_s16ne_arm(samples, volumes, CHANNELS, sizeof(samples));
+        pa_volume_s16ne_arm(samples+ALIGN, volumes, CHANNELS, sizeof(samples));
     }
     stop = pa_rtclock_now();
     pa_log_info("ARM: %llu usec.", (long long unsigned int)(stop - start));

Repositories maintained by Peter Meerwald, pmeerw@pmeerw.net.