Mercurial > hg > pa-neon
comparison sconv_neon.c @ 5:07763f536182 default tip
ALIGNment support
| author | Peter Meerwald <p.meerwald@bct-electronic.com> |
|---|---|
| date | Sun, 08 Jul 2012 21:48:08 +0200 |
| parents | e889fd0e7769 |
| children |
comparison
equal
deleted
inserted
replaced
| 4:1f6289166006 | 5:07763f536182 |
|---|---|
| 107 "mov %[n], %[n], lsr #2\n\t" | 107 "mov %[n], %[n], lsr #2\n\t" |
| 108 "vdup.f32 q1, %[invscale]\n\t" | 108 "vdup.f32 q1, %[invscale]\n\t" |
| 109 "1:\n\t" | 109 "1:\n\t" |
| 110 "vld1.16 {d0}, [%[src]]!\n\t" | 110 "vld1.16 {d0}, [%[src]]!\n\t" |
| 111 "vmovl.s16 q0, d0\n\t" | 111 "vmovl.s16 q0, d0\n\t" |
| 112 | 112 |
| 113 "vcvt.f32.s32 q0, q0\n\t" | 113 "vcvt.f32.s32 q0, q0\n\t" |
| 114 "vmul.f32 q0, q0, q1\n\t" | 114 "vmul.f32 q0, q0, q1\n\t" |
| 115 | 115 |
| 116 "subs %[n], %[n], #1\n\t" | 116 "subs %[n], %[n], #1\n\t" |
| 117 "vst1.32 {q0}, [%[dst]]!\n\t" | 117 "vst1.32 {q0}, [%[dst]]!\n\t" |
| 128 } | 128 } |
| 129 } | 129 } |
| 130 | 130 |
| 131 #define SAMPLES 1019 | 131 #define SAMPLES 1019 |
| 132 #define TIMES 100000 | 132 #define TIMES 100000 |
| 133 #define ALIGN 1 | |
| 133 | 134 |
| 134 static void run_test_from(void) { | 135 static void run_test_from(void) { |
| 135 int16_t samples[SAMPLES]; | 136 int16_t samples[SAMPLES+ALIGN]; |
| 136 int16_t samples_ref[SAMPLES]; | 137 int16_t samples_ref[SAMPLES+ALIGN]; |
| 137 float floats[SAMPLES]; | 138 float floats[SAMPLES+ALIGN]; |
| 138 int i; | 139 int i; |
| 139 pa_usec_t start, stop; | 140 pa_usec_t start, stop; |
| 140 | 141 |
| 141 pa_log_debug("checking NEON sconv_s16le_from_float(%d)", SAMPLES); | 142 pa_log_debug("checking NEON sconv_s16le_from_float(%d)", SAMPLES); |
| 142 | 143 |
| 143 memset(samples_ref, 0, sizeof(samples_ref)); | 144 memset(samples_ref, 0, sizeof(samples_ref)); |
| 144 memset(samples, 0, sizeof(samples)); | 145 memset(samples, 0, sizeof(samples)); |
| 145 | 146 |
| 146 for (i = 0; i < SAMPLES; i++) { | 147 for (i = 0; i < SAMPLES+ALIGN; i++) { |
| 147 floats[i] = 2.1f * (rand()/(float) RAND_MAX - 0.5f); | 148 floats[i] = 2.1f * (rand()/(float) RAND_MAX - 0.5f); |
| 148 } | 149 } |
| 149 | 150 |
| 150 pa_sconv_s16le_from_float32ne(SAMPLES, floats, samples_ref); | 151 pa_sconv_s16le_from_float32ne(SAMPLES, floats+ALIGN, samples_ref+ALIGN); |
| 151 pa_sconv_s16le_from_f32ne_neon(SAMPLES, floats, samples); | 152 pa_sconv_s16le_from_f32ne_neon(SAMPLES, floats+ALIGN, samples+ALIGN); |
| 152 | 153 |
| 153 for (i = 0; i < SAMPLES; i++) { | 154 for (i = ALIGN; i < SAMPLES+ALIGN; i++) { |
| 154 if (abs(samples[i] - samples_ref[i]) > 0) { | 155 if (abs(samples[i] - samples_ref[i]) > 0) { |
| 155 pa_log_debug("%d: %d != %d (%f)", i, samples[i], samples_ref[i], | 156 pa_log_debug("%d: %d != %d (%f)", i, samples[i], samples_ref[i], |
| 156 floats[i]); | 157 floats[i]); |
| 157 } | 158 } |
| 158 } | 159 } |
| 159 | 160 |
| 160 start = pa_rtclock_now(); | 161 start = pa_rtclock_now(); |
| 161 for (i = 0; i < TIMES; i++) { | 162 for (i = 0; i < TIMES; i++) { |
| 162 pa_sconv_s16le_from_f32ne_neon(SAMPLES, floats, samples); | 163 pa_sconv_s16le_from_f32ne_neon(SAMPLES, floats+ALIGN, samples+ALIGN); |
| 163 } | 164 } |
| 164 stop = pa_rtclock_now(); | 165 stop = pa_rtclock_now(); |
| 165 pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); | 166 pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); |
| 166 | 167 |
| 167 start = pa_rtclock_now(); | 168 start = pa_rtclock_now(); |
| 168 for (i = 0; i < TIMES; i++) { | 169 for (i = 0; i < TIMES; i++) { |
| 169 pa_sconv_s16le_from_float32ne(SAMPLES, floats, samples_ref); | 170 pa_sconv_s16le_from_float32ne(SAMPLES, floats+ALIGN, samples_ref+ALIGN); |
| 170 } | 171 } |
| 171 stop = pa_rtclock_now(); | 172 stop = pa_rtclock_now(); |
| 172 pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); | 173 pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); |
| 173 } | 174 } |
| 174 | 175 |
| 175 static void run_test_to(void) { | 176 static void run_test_to(void) { |
| 176 int16_t samples[SAMPLES]; | 177 int16_t samples[SAMPLES+ALIGN]; |
| 177 float floats[SAMPLES]; | 178 float floats[SAMPLES+ALIGN]; |
| 178 float floats_ref[SAMPLES]; | 179 float floats_ref[SAMPLES+ALIGN]; |
| 179 int i; | 180 int i; |
| 180 pa_usec_t start, stop; | 181 pa_usec_t start, stop; |
| 181 pa_convert_func_t func; | |
| 182 | 182 |
| 183 pa_log_debug("checking NEON sconv_s16le_to_float(%d)", SAMPLES); | 183 pa_log_debug("checking NEON sconv_s16le_to_float(%d)", SAMPLES); |
| 184 | 184 |
| 185 memset(floats_ref, 0, sizeof(floats_ref)); | 185 memset(floats_ref, 0, sizeof(floats_ref)); |
| 186 memset(floats, 0, sizeof(float)); | 186 memset(floats, 0, sizeof(float)); |
| 187 | 187 |
| 188 for (i = 0; i < SAMPLES; i++) { | 188 for (i = 0; i < SAMPLES+ALIGN; i++) { |
| 189 samples[i] = rand() - RAND_MAX/2; | 189 samples[i] = rand() - RAND_MAX/2; |
| 190 } | 190 } |
| 191 | 191 |
| 192 func = (pa_convert_func_t) pa_sconv_s16le_to_float32ne; | 192 pa_sconv_s16le_to_float32ne(SAMPLES, samples+ALIGN, floats_ref+ALIGN); |
| 193 func(SAMPLES, samples, floats_ref); | 193 pa_sconv_s16le_to_f32ne_neon(SAMPLES, samples+ALIGN, floats+ALIGN); |
| 194 pa_sconv_s16le_to_f32ne_neon(SAMPLES, samples, floats); | 194 |
| 195 | 195 for (i = ALIGN; i < SAMPLES+ALIGN; i++) { |
| 196 for (i = 0; i < SAMPLES; i++) { | |
| 197 if (fabsf(floats[i] - floats_ref[i]) > 0.00001) { | 196 if (fabsf(floats[i] - floats_ref[i]) > 0.00001) { |
| 198 pa_log_debug("%d: %.8f != %.8f (%d)", i, floats[i], floats_ref[i], | 197 pa_log_debug("%d: %.8f != %.8f (%d)", i, floats[i], floats_ref[i], |
| 199 samples[i]); | 198 samples[i]); |
| 200 } | 199 } |
| 201 } | 200 } |
| 202 | 201 |
| 203 start = pa_rtclock_now(); | 202 start = pa_rtclock_now(); |
| 204 for (i = 0; i < TIMES; i++) { | 203 for (i = 0; i < TIMES; i++) { |
| 205 pa_sconv_s16le_to_f32ne_neon(SAMPLES, samples, floats); | 204 pa_sconv_s16le_to_f32ne_neon(SAMPLES, samples+ALIGN, floats+ALIGN); |
| 206 } | 205 } |
| 207 stop = pa_rtclock_now(); | 206 stop = pa_rtclock_now(); |
| 208 pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); | 207 pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); |
| 209 | 208 |
| 210 start = pa_rtclock_now(); | 209 start = pa_rtclock_now(); |
| 211 for (i = 0; i < TIMES; i++) { | 210 for (i = 0; i < TIMES; i++) { |
| 212 func(SAMPLES, samples, floats_ref); | 211 pa_sconv_s16le_to_float32ne(SAMPLES, samples+ALIGN, floats_ref+ALIGN); |
| 213 } | 212 } |
| 214 stop = pa_rtclock_now(); | 213 stop = pa_rtclock_now(); |
| 215 pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); | 214 pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); |
| 216 } | 215 } |
| 217 | 216 |
