diff remap_neon.c @ 5:07763f536182 default tip

ALIGNment support
author Peter Meerwald <p.meerwald@bct-electronic.com>
date Sun, 08 Jul 2012 21:48:08 +0200
parents e889fd0e7769
children
line wrap: on
line diff
--- a/remap_neon.c	Sun Jul 08 21:03:41 2012 +0200
+++ b/remap_neon.c	Sun Jul 08 21:48:08 2012 +0200
@@ -239,7 +239,7 @@
 
 static void mono_to_stereo_float_neon_a8(float *dst, const float *src, unsigned n) {
     int i = n & 3;
-    
+
     asm volatile (
     "mov        %[n], %[n], lsr #2\n\t"
     "1:\n\t"
@@ -288,7 +288,7 @@
 
 static void mono_to_stereo_int16_neon(int16_t *dst, const int16_t *src, unsigned n) {
     int i = n & 7;
-    
+
     asm volatile (
     "mov        %[n], %[n], lsr #3\n\t"
     "1:\n\t"
@@ -298,7 +298,7 @@
     "vst2.16	{q0,q1}, [%[dst]]!\n\t"
     "bgt	    1b\n\t"
       // output operands (or input operands that get modified)
-    : [dst] "+r" (dst), [src] "+r" (src), [n] "+r" (n) 
+    : [dst] "+r" (dst), [src] "+r" (src), [n] "+r" (n)
     : // input operands
     : "memory", "cc" // clobber list
     );
@@ -399,13 +399,14 @@
 
 #define SAMPLES 1019
 #define TIMES 500000
+#define ALIGN 1
 
 static void run_test_mono_to_stereo_float(void) {
-    float stereo_a9[2*SAMPLES];
-    float stereo_a8[2*SAMPLES];
-    float stereo_ref[2*SAMPLES];
-    float stereo_gen[2*SAMPLES];
-    float mono[SAMPLES]; 
+    float stereo_a9[2*SAMPLES+ALIGN];
+    float stereo_a8[2*SAMPLES+ALIGN];
+    float stereo_ref[2*SAMPLES+ALIGN];
+    float stereo_gen[2*SAMPLES+ALIGN];
+    float mono[SAMPLES+ALIGN];
     int i;
     pa_usec_t start, stop;
     pa_sample_format_t sf;
@@ -419,7 +420,7 @@
     memset(stereo_a9, 0, sizeof(stereo_a9));
     memset(stereo_a8, 0, sizeof(stereo_a8));
 
-    for (i = 0; i < SAMPLES; i++) {
+    for (i = 0; i < SAMPLES+ALIGN; i++) {
         mono[i] = rand()/(float) RAND_MAX - 0.5f;
     }
 
@@ -434,24 +435,24 @@
     remap.map_table_f[0][0] = 1.0;
     remap.map_table_f[1][0] = 1.0;
 
-    remap_mono_to_stereo_neon_a9(&remap, stereo_a9, mono, SAMPLES);
-    remap_mono_to_stereo_neon_a8(&remap, stereo_a8, mono, SAMPLES);
-    remap_mono_to_stereo_c(&remap, stereo_ref, mono, SAMPLES);
-    remap_channels_matrix_c(&remap, stereo_gen, mono, SAMPLES);
+    remap_mono_to_stereo_neon_a9(&remap, stereo_a9+ALIGN, mono+ALIGN, SAMPLES);
+    remap_mono_to_stereo_neon_a8(&remap, stereo_a8+ALIGN, mono+ALIGN, SAMPLES);
+    remap_mono_to_stereo_c(&remap, stereo_ref+ALIGN, mono+ALIGN, SAMPLES);
+    remap_channels_matrix_c(&remap, stereo_gen+ALIGN, mono+ALIGN, SAMPLES);
 
-    for (i = 0; i < 2*SAMPLES; i++) {
+    for (i = ALIGN; i < 2*SAMPLES+ALIGN; i++) {
         if (fabsf(stereo_a9[i] - stereo_ref[i]) > 0.00001) {
             pa_log_debug("NEON/A9 %d: %.3f != %.3f (%.3f)", i, stereo_a9[i], stereo_ref[i],
                       mono[i/2]);
         }
     }
-    for (i = 0; i < 2*SAMPLES; i++) {
+    for (i = ALIGN; i < 2*SAMPLES+ALIGN; i++) {
         if (fabsf(stereo_a8[i] - stereo_ref[i]) > 0.00001) {
             pa_log_debug("NEON/A8 %d: %.3f != %.3f (%.3f)", i, stereo_a8[i], stereo_ref[i],
                       mono[i/2]);
         }
     }
-    for (i = 0; i < 2*SAMPLES; i++) {
+    for (i = ALIGN; i < 2*SAMPLES+ALIGN; i++) {
         if (fabsf(stereo_gen[i] - stereo_ref[i]) > 0.00001) {
             pa_log_debug("generic %d: %.3f != %.3f (%.3f)", i, stereo_gen[i], stereo_ref[i],
                       mono[i/2]);
@@ -460,38 +461,38 @@
 
     start = pa_rtclock_now();
     for (i = 0; i < TIMES; i++) {
-        remap_mono_to_stereo_c(&remap, stereo_ref, mono, SAMPLES);
+        remap_mono_to_stereo_c(&remap, stereo_ref+ALIGN, mono+ALIGN, SAMPLES);
     }
     stop = pa_rtclock_now();
     pa_log_info("ref:\t\t%llu usec.", (long long unsigned int)(stop - start));
 
     start = pa_rtclock_now();
     for (i = 0; i < TIMES; i++) {
-        remap_mono_to_stereo_neon_a9(&remap, stereo_a9, mono, SAMPLES);
+        remap_mono_to_stereo_neon_a9(&remap, stereo_a9+ALIGN, mono+ALIGN, SAMPLES);
     }
     stop = pa_rtclock_now();
     pa_log_info("NEON/A9:\t%llu usec.", (long long unsigned int)(stop - start));
 
     start = pa_rtclock_now();
     for (i = 0; i < TIMES; i++) {
-        remap_mono_to_stereo_neon_a8(&remap, stereo_a8, mono, SAMPLES);
+        remap_mono_to_stereo_neon_a8(&remap, stereo_a8+ALIGN, mono+ALIGN, SAMPLES);
     }
     stop = pa_rtclock_now();
     pa_log_info("NEON/A8:\t%llu usec.", (long long unsigned int)(stop - start));
 
     start = pa_rtclock_now();
     for (i = 0; i < TIMES; i++) {
-        remap_channels_matrix_c(&remap, stereo_gen, mono, SAMPLES);
+        remap_channels_matrix_c(&remap, stereo_gen+ALIGN, mono+ALIGN, SAMPLES);
     }
     stop = pa_rtclock_now();
     pa_log_info("generic:\t%llu usec.", (long long unsigned int)(stop - start));
 }
 
 static void run_test_stereo_to_mono_float(void) {
-    float stereo[2*SAMPLES];
-    float mono_ref[SAMPLES];
-    float mono_gen[SAMPLES];
-    float mono[SAMPLES];
+    float stereo[2*SAMPLES+ALIGN];
+    float mono_ref[SAMPLES+ALIGN];
+    float mono_gen[SAMPLES+ALIGN];
+    float mono[SAMPLES+ALIGN];
     int i;
     pa_usec_t start, stop;
     pa_sample_format_t sf;
@@ -503,7 +504,7 @@
     memset(mono_ref, 0, sizeof(mono_ref));
     memset(mono, 0, sizeof(mono));
 
-    for (i = 0; i < 2*SAMPLES; i++) {
+    for (i = 0; i < 2*SAMPLES+ALIGN; i++) {
         stereo[i] = rand()/(float) RAND_MAX - 0.5f;
     }
 
@@ -518,11 +519,11 @@
     remap.map_table_f[0][0] = 1.0;
     remap.map_table_f[0][1] = 1.0;
 
-    remap_stereo_to_mono_c(&remap, mono_ref, stereo, SAMPLES);
-    remap_channels_matrix_c(&remap, mono_gen, stereo, SAMPLES);
-    remap_stereo_to_mono_neon(&remap, mono, stereo, SAMPLES);
+    remap_stereo_to_mono_c(&remap, mono_ref+ALIGN, stereo+ALIGN, SAMPLES);
+    remap_channels_matrix_c(&remap, mono_gen+ALIGN, stereo+ALIGN, SAMPLES);
+    remap_stereo_to_mono_neon(&remap, mono+ALIGN, stereo+ALIGN, SAMPLES);
 
-    for (i = 0; i < SAMPLES; i++) {
+    for (i = ALIGN; i < SAMPLES+ALIGN; i++) {
         if (fabsf(mono[i] - mono_ref[i]) > 0.00001) {
             pa_log_debug("%d: %.3f != %.3f (%.3f %0.3f)", i, mono[i], mono_ref[i],
                       stereo[2*i+0], stereo[2*i+1]);
@@ -531,32 +532,32 @@
 
     start = pa_rtclock_now();
     for (i = 0; i < TIMES; i++) {
-        remap_stereo_to_mono_neon(&remap, mono, stereo, SAMPLES);
+        remap_stereo_to_mono_neon(&remap, mono+ALIGN, stereo+ALIGN, SAMPLES);
     }
     stop = pa_rtclock_now();
     pa_log_info("NEON:\t\t%llu usec.", (long long unsigned int)(stop - start));
 
     start = pa_rtclock_now();
     for (i = 0; i < TIMES; i++) {
-        remap_stereo_to_mono_c(&remap, mono_ref, stereo, SAMPLES);
+        remap_stereo_to_mono_c(&remap, mono_ref+ALIGN, stereo+ALIGN, SAMPLES);
     }
     stop = pa_rtclock_now();
     pa_log_info("ref:\t\t%llu usec.", (long long unsigned int)(stop - start));
 
     start = pa_rtclock_now();
     for (i = 0; i < TIMES; i++) {
-        remap_channels_matrix_c(&remap, mono_gen, stereo, SAMPLES);
+        remap_channels_matrix_c(&remap, mono_gen+ALIGN, stereo+ALIGN, SAMPLES);
     }
     stop = pa_rtclock_now();
     pa_log_info("generic:\t%llu usec.", (long long unsigned int)(stop - start));
 }
 
 static void run_test_mono_to_stereo_s16(void) {
-    int16_t stereo_a9[2*SAMPLES];
-    int16_t stereo_a8[2*SAMPLES];
-    int16_t stereo_ref[2*SAMPLES];
-    int16_t stereo_gen[2*SAMPLES];
-    int16_t mono[SAMPLES];
+    int16_t stereo_a9[2*SAMPLES+ALIGN];
+    int16_t stereo_a8[2*SAMPLES+ALIGN];
+    int16_t stereo_ref[2*SAMPLES+ALIGN];
+    int16_t stereo_gen[2*SAMPLES+ALIGN];
+    int16_t mono[SAMPLES+ALIGN];
     int i;
     pa_usec_t start, stop;
     pa_sample_format_t sf;
@@ -570,7 +571,7 @@
     memset(stereo_a8, 0, sizeof(stereo_a8));
     memset(stereo_gen, 0, sizeof(stereo_gen));
 
-    for (i = 0; i < SAMPLES; i++) {
+    for (i = 0; i < SAMPLES+ALIGN; i++) {
         mono[i] = rand() - RAND_MAX/2;
     }
 
@@ -584,26 +585,26 @@
     remap.o_ss = &oss;
     remap.map_table_i[0][0] = 0x10000;
     remap.map_table_i[1][0] = 0x10000;
-    
-    remap_mono_to_stereo_c(&remap, stereo_ref, mono, SAMPLES);
-    remap_channels_matrix_c(&remap, stereo_gen, mono, SAMPLES);
-    remap_mono_to_stereo_neon_a9(&remap, stereo_a9, mono, SAMPLES);
-    remap_mono_to_stereo_neon_a8(&remap, stereo_a8, mono, SAMPLES);
-    
-    for (i = 0; i < 2*SAMPLES; i++) {
+
+    remap_mono_to_stereo_c(&remap, stereo_ref+ALIGN, mono+ALIGN, SAMPLES);
+    remap_channels_matrix_c(&remap, stereo_gen+ALIGN, mono+ALIGN, SAMPLES);
+    remap_mono_to_stereo_neon_a9(&remap, stereo_a9+ALIGN, mono+ALIGN, SAMPLES);
+    remap_mono_to_stereo_neon_a8(&remap, stereo_a8+ALIGN, mono+ALIGN, SAMPLES);
+
+    for (i = ALIGN; i < 2*SAMPLES+ALIGN; i++) {
         if (abs(stereo_a9[i] - stereo_ref[i]) > 0) {
             pa_log_debug("NEON/A9 %d: %d != %d (%d)", i, stereo_a9[i], stereo_ref[i],
                       mono[i/2]);
         }
     }
-    for (i = 0; i < 2*SAMPLES; i++) {
+    for (i = ALIGN; i < 2*SAMPLES+ALIGN; i++) {
         if (abs(stereo_a8[i] - stereo_ref[i]) > 0) {
             pa_log_debug("NEON/A8 %d: %d != %d (%d)", i, stereo_a8[i], stereo_ref[i],
                       mono[i/2]);
         }
     }
 
-    for (i = 0; i < 2*SAMPLES; i++) {
+    for (i = ALIGN; i < 2*SAMPLES+ALIGN; i++) {
         if (abs(stereo_gen[i] - stereo_ref[i]) > 0) {
             pa_log_debug("generic %d: %d != %d (%d)", i, stereo_gen[i], stereo_ref[i],
                       mono[i/2]);
@@ -612,38 +613,38 @@
 
     start = pa_rtclock_now();
     for (i = 0; i < TIMES; i++) {
-        remap_mono_to_stereo_neon_a9(&remap, stereo_a9, mono, SAMPLES);
+        remap_mono_to_stereo_neon_a9(&remap, stereo_a9+ALIGN, mono+ALIGN, SAMPLES);
     }
     stop = pa_rtclock_now();
     pa_log_info("NEON/A9:\t%llu usec.", (long long unsigned int)(stop - start));
 
     start = pa_rtclock_now();
     for (i = 0; i < TIMES; i++) {
-        remap_mono_to_stereo_neon_a8(&remap, stereo_a8, mono, SAMPLES);
+        remap_mono_to_stereo_neon_a8(&remap, stereo_a8+ALIGN, mono+ALIGN, SAMPLES);
     }
     stop = pa_rtclock_now();
     pa_log_info("NEON/A8:\t%llu usec.", (long long unsigned int)(stop - start));
 
     start = pa_rtclock_now();
     for (i = 0; i < TIMES; i++) {
-        remap_mono_to_stereo_c(&remap, stereo_ref, mono, SAMPLES);
+        remap_mono_to_stereo_c(&remap, stereo_ref+ALIGN, mono+ALIGN, SAMPLES);
     }
     stop = pa_rtclock_now();
     pa_log_info("ref:\t\t%llu usec.", (long long unsigned int)(stop - start));
 
     start = pa_rtclock_now();
     for (i = 0; i < TIMES; i++) {
-        remap_channels_matrix_c(&remap, stereo_gen, mono, SAMPLES);
+        remap_channels_matrix_c(&remap, stereo_gen+ALIGN, mono+ALIGN, SAMPLES);
     }
     stop = pa_rtclock_now();
     pa_log_info("generic:\t%llu usec.", (long long unsigned int)(stop - start));
 }
 
 static void run_test_stereo_to_mono_s16(void) {
-    int16_t stereo[2*SAMPLES];
-    int16_t mono_ref[SAMPLES];
-    int16_t mono_gen[SAMPLES];
-    int16_t mono[SAMPLES];
+    int16_t stereo[2*SAMPLES+ALIGN];
+    int16_t mono_ref[SAMPLES+ALIGN];
+    int16_t mono_gen[SAMPLES+ALIGN];
+    int16_t mono[SAMPLES+ALIGN];
     int i;
     pa_usec_t start, stop;
     pa_sample_format_t sf;
@@ -656,7 +657,7 @@
     memset(mono_gen, 0, sizeof(mono_gen));
     memset(mono, 0, sizeof(mono));
 
-    for (i = 0; i < 2*SAMPLES; i++) {
+    for (i = 0; i < 2*SAMPLES+ALIGN; i++) {
         stereo[i] = rand() - RAND_MAX/2;
     }
 
@@ -670,18 +671,18 @@
     remap.o_ss = &oss;
     remap.map_table_i[0][0] = 0x10000;
     remap.map_table_i[0][1] = 0x10000;
-    
-    remap_stereo_to_mono_c(&remap, mono_ref, stereo, SAMPLES);
-    remap_channels_matrix_c(&remap, mono_gen, stereo, SAMPLES);
-    remap_stereo_to_mono_neon(&remap, mono, stereo, SAMPLES);
 
-    for (i = 0; i < SAMPLES; i++) {
+    remap_stereo_to_mono_c(&remap, mono_ref+ALIGN, stereo+ALIGN, SAMPLES);
+    remap_channels_matrix_c(&remap, mono_gen+ALIGN, stereo+ALIGN, SAMPLES);
+    remap_stereo_to_mono_neon(&remap, mono+ALIGN, stereo+ALIGN, SAMPLES);
+
+    for (i = ALIGN; i < SAMPLES+ALIGN; i++) {
         if (abs(mono[i] - mono_ref[i]) > 0) {
             pa_log_debug("%d: %d != %d (%d)", i, mono[i], mono_ref[i],
                       stereo[2*i+0], stereo[2*i+1]);
         }
     }
-    for (i = 0; i < SAMPLES; i++) {
+    for (i = ALIGN; i < SAMPLES+ALIGN; i++) {
         if (abs(mono[i] - mono_gen[i]) > 0) {
             pa_log_debug("%d: %d != %d (%d)", i, mono[i], mono_gen[i],
                       stereo[2*i+0], stereo[2*i+1]);
@@ -690,21 +691,21 @@
 
     start = pa_rtclock_now();
     for (i = 0; i < TIMES; i++) {
-        remap_stereo_to_mono_neon(&remap, mono, stereo, SAMPLES);
+        remap_stereo_to_mono_neon(&remap, mono+ALIGN, stereo+ALIGN, SAMPLES);
     }
     stop = pa_rtclock_now();
     pa_log_info("NEON:\t\t%llu usec.", (long long unsigned int)(stop - start));
 
     start = pa_rtclock_now();
     for (i = 0; i < TIMES; i++) {
-        remap_stereo_to_mono_c(&remap, mono_ref, stereo, SAMPLES);
+        remap_stereo_to_mono_c(&remap, mono_ref+ALIGN, stereo+ALIGN, SAMPLES);
     }
     stop = pa_rtclock_now();
     pa_log_info("ref:\t\t%llu usec.", (long long unsigned int)(stop - start));
 
     start = pa_rtclock_now();
     for (i = 0; i < TIMES; i++) {
-        remap_channels_matrix_c(&remap, mono_gen, stereo, SAMPLES);
+        remap_channels_matrix_c(&remap, mono_gen+ALIGN, stereo+ALIGN, SAMPLES);
     }
     stop = pa_rtclock_now();
     pa_log_info("generic:\t%llu usec.", (long long unsigned int)(stop - start));

Repositories maintained by Peter Meerwald, pmeerw@pmeerw.net.