Mercurial > hg > pa-neon
comparison sconv_neon.c @ 5:07763f536182 default tip
ALIGNment support
author | Peter Meerwald <p.meerwald@bct-electronic.com> |
---|---|
date | Sun, 08 Jul 2012 21:48:08 +0200 |
parents | e889fd0e7769 |
children |
comparison
equal
deleted
inserted
replaced
4:1f6289166006 | 5:07763f536182 |
---|---|
107 "mov %[n], %[n], lsr #2\n\t" | 107 "mov %[n], %[n], lsr #2\n\t" |
108 "vdup.f32 q1, %[invscale]\n\t" | 108 "vdup.f32 q1, %[invscale]\n\t" |
109 "1:\n\t" | 109 "1:\n\t" |
110 "vld1.16 {d0}, [%[src]]!\n\t" | 110 "vld1.16 {d0}, [%[src]]!\n\t" |
111 "vmovl.s16 q0, d0\n\t" | 111 "vmovl.s16 q0, d0\n\t" |
112 | 112 |
113 "vcvt.f32.s32 q0, q0\n\t" | 113 "vcvt.f32.s32 q0, q0\n\t" |
114 "vmul.f32 q0, q0, q1\n\t" | 114 "vmul.f32 q0, q0, q1\n\t" |
115 | 115 |
116 "subs %[n], %[n], #1\n\t" | 116 "subs %[n], %[n], #1\n\t" |
117 "vst1.32 {q0}, [%[dst]]!\n\t" | 117 "vst1.32 {q0}, [%[dst]]!\n\t" |
128 } | 128 } |
129 } | 129 } |
130 | 130 |
131 #define SAMPLES 1019 | 131 #define SAMPLES 1019 |
132 #define TIMES 100000 | 132 #define TIMES 100000 |
133 #define ALIGN 1 | |
133 | 134 |
134 static void run_test_from(void) { | 135 static void run_test_from(void) { |
135 int16_t samples[SAMPLES]; | 136 int16_t samples[SAMPLES+ALIGN]; |
136 int16_t samples_ref[SAMPLES]; | 137 int16_t samples_ref[SAMPLES+ALIGN]; |
137 float floats[SAMPLES]; | 138 float floats[SAMPLES+ALIGN]; |
138 int i; | 139 int i; |
139 pa_usec_t start, stop; | 140 pa_usec_t start, stop; |
140 | 141 |
141 pa_log_debug("checking NEON sconv_s16le_from_float(%d)", SAMPLES); | 142 pa_log_debug("checking NEON sconv_s16le_from_float(%d)", SAMPLES); |
142 | 143 |
143 memset(samples_ref, 0, sizeof(samples_ref)); | 144 memset(samples_ref, 0, sizeof(samples_ref)); |
144 memset(samples, 0, sizeof(samples)); | 145 memset(samples, 0, sizeof(samples)); |
145 | 146 |
146 for (i = 0; i < SAMPLES; i++) { | 147 for (i = 0; i < SAMPLES+ALIGN; i++) { |
147 floats[i] = 2.1f * (rand()/(float) RAND_MAX - 0.5f); | 148 floats[i] = 2.1f * (rand()/(float) RAND_MAX - 0.5f); |
148 } | 149 } |
149 | 150 |
150 pa_sconv_s16le_from_float32ne(SAMPLES, floats, samples_ref); | 151 pa_sconv_s16le_from_float32ne(SAMPLES, floats+ALIGN, samples_ref+ALIGN); |
151 pa_sconv_s16le_from_f32ne_neon(SAMPLES, floats, samples); | 152 pa_sconv_s16le_from_f32ne_neon(SAMPLES, floats+ALIGN, samples+ALIGN); |
152 | 153 |
153 for (i = 0; i < SAMPLES; i++) { | 154 for (i = ALIGN; i < SAMPLES+ALIGN; i++) { |
154 if (abs(samples[i] - samples_ref[i]) > 0) { | 155 if (abs(samples[i] - samples_ref[i]) > 0) { |
155 pa_log_debug("%d: %d != %d (%f)", i, samples[i], samples_ref[i], | 156 pa_log_debug("%d: %d != %d (%f)", i, samples[i], samples_ref[i], |
156 floats[i]); | 157 floats[i]); |
157 } | 158 } |
158 } | 159 } |
159 | 160 |
160 start = pa_rtclock_now(); | 161 start = pa_rtclock_now(); |
161 for (i = 0; i < TIMES; i++) { | 162 for (i = 0; i < TIMES; i++) { |
162 pa_sconv_s16le_from_f32ne_neon(SAMPLES, floats, samples); | 163 pa_sconv_s16le_from_f32ne_neon(SAMPLES, floats+ALIGN, samples+ALIGN); |
163 } | 164 } |
164 stop = pa_rtclock_now(); | 165 stop = pa_rtclock_now(); |
165 pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); | 166 pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); |
166 | 167 |
167 start = pa_rtclock_now(); | 168 start = pa_rtclock_now(); |
168 for (i = 0; i < TIMES; i++) { | 169 for (i = 0; i < TIMES; i++) { |
169 pa_sconv_s16le_from_float32ne(SAMPLES, floats, samples_ref); | 170 pa_sconv_s16le_from_float32ne(SAMPLES, floats+ALIGN, samples_ref+ALIGN); |
170 } | 171 } |
171 stop = pa_rtclock_now(); | 172 stop = pa_rtclock_now(); |
172 pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); | 173 pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); |
173 } | 174 } |
174 | 175 |
175 static void run_test_to(void) { | 176 static void run_test_to(void) { |
176 int16_t samples[SAMPLES]; | 177 int16_t samples[SAMPLES+ALIGN]; |
177 float floats[SAMPLES]; | 178 float floats[SAMPLES+ALIGN]; |
178 float floats_ref[SAMPLES]; | 179 float floats_ref[SAMPLES+ALIGN]; |
179 int i; | 180 int i; |
180 pa_usec_t start, stop; | 181 pa_usec_t start, stop; |
181 pa_convert_func_t func; | |
182 | 182 |
183 pa_log_debug("checking NEON sconv_s16le_to_float(%d)", SAMPLES); | 183 pa_log_debug("checking NEON sconv_s16le_to_float(%d)", SAMPLES); |
184 | 184 |
185 memset(floats_ref, 0, sizeof(floats_ref)); | 185 memset(floats_ref, 0, sizeof(floats_ref)); |
186 memset(floats, 0, sizeof(float)); | 186 memset(floats, 0, sizeof(float)); |
187 | 187 |
188 for (i = 0; i < SAMPLES; i++) { | 188 for (i = 0; i < SAMPLES+ALIGN; i++) { |
189 samples[i] = rand() - RAND_MAX/2; | 189 samples[i] = rand() - RAND_MAX/2; |
190 } | 190 } |
191 | 191 |
192 func = (pa_convert_func_t) pa_sconv_s16le_to_float32ne; | 192 pa_sconv_s16le_to_float32ne(SAMPLES, samples+ALIGN, floats_ref+ALIGN); |
193 func(SAMPLES, samples, floats_ref); | 193 pa_sconv_s16le_to_f32ne_neon(SAMPLES, samples+ALIGN, floats+ALIGN); |
194 pa_sconv_s16le_to_f32ne_neon(SAMPLES, samples, floats); | 194 |
195 | 195 for (i = ALIGN; i < SAMPLES+ALIGN; i++) { |
196 for (i = 0; i < SAMPLES; i++) { | |
197 if (fabsf(floats[i] - floats_ref[i]) > 0.00001) { | 196 if (fabsf(floats[i] - floats_ref[i]) > 0.00001) { |
198 pa_log_debug("%d: %.8f != %.8f (%d)", i, floats[i], floats_ref[i], | 197 pa_log_debug("%d: %.8f != %.8f (%d)", i, floats[i], floats_ref[i], |
199 samples[i]); | 198 samples[i]); |
200 } | 199 } |
201 } | 200 } |
202 | 201 |
203 start = pa_rtclock_now(); | 202 start = pa_rtclock_now(); |
204 for (i = 0; i < TIMES; i++) { | 203 for (i = 0; i < TIMES; i++) { |
205 pa_sconv_s16le_to_f32ne_neon(SAMPLES, samples, floats); | 204 pa_sconv_s16le_to_f32ne_neon(SAMPLES, samples+ALIGN, floats+ALIGN); |
206 } | 205 } |
207 stop = pa_rtclock_now(); | 206 stop = pa_rtclock_now(); |
208 pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); | 207 pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); |
209 | 208 |
210 start = pa_rtclock_now(); | 209 start = pa_rtclock_now(); |
211 for (i = 0; i < TIMES; i++) { | 210 for (i = 0; i < TIMES; i++) { |
212 func(SAMPLES, samples, floats_ref); | 211 pa_sconv_s16le_to_float32ne(SAMPLES, samples+ALIGN, floats_ref+ALIGN); |
213 } | 212 } |
214 stop = pa_rtclock_now(); | 213 stop = pa_rtclock_now(); |
215 pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); | 214 pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); |
216 } | 215 } |
217 | 216 |