comparison sconv_neon.c @ 5:07763f536182 default tip

ALIGNment support
author Peter Meerwald <p.meerwald@bct-electronic.com>
date Sun, 08 Jul 2012 21:48:08 +0200
parents e889fd0e7769
children
comparison
equal deleted inserted replaced
4:1f6289166006 5:07763f536182
107 "mov %[n], %[n], lsr #2\n\t" 107 "mov %[n], %[n], lsr #2\n\t"
108 "vdup.f32 q1, %[invscale]\n\t" 108 "vdup.f32 q1, %[invscale]\n\t"
109 "1:\n\t" 109 "1:\n\t"
110 "vld1.16 {d0}, [%[src]]!\n\t" 110 "vld1.16 {d0}, [%[src]]!\n\t"
111 "vmovl.s16 q0, d0\n\t" 111 "vmovl.s16 q0, d0\n\t"
112 112
113 "vcvt.f32.s32 q0, q0\n\t" 113 "vcvt.f32.s32 q0, q0\n\t"
114 "vmul.f32 q0, q0, q1\n\t" 114 "vmul.f32 q0, q0, q1\n\t"
115 115
116 "subs %[n], %[n], #1\n\t" 116 "subs %[n], %[n], #1\n\t"
117 "vst1.32 {q0}, [%[dst]]!\n\t" 117 "vst1.32 {q0}, [%[dst]]!\n\t"
128 } 128 }
129 } 129 }
130 130
131 #define SAMPLES 1019 131 #define SAMPLES 1019
132 #define TIMES 100000 132 #define TIMES 100000
133 #define ALIGN 1
133 134
134 static void run_test_from(void) { 135 static void run_test_from(void) {
135 int16_t samples[SAMPLES]; 136 int16_t samples[SAMPLES+ALIGN];
136 int16_t samples_ref[SAMPLES]; 137 int16_t samples_ref[SAMPLES+ALIGN];
137 float floats[SAMPLES]; 138 float floats[SAMPLES+ALIGN];
138 int i; 139 int i;
139 pa_usec_t start, stop; 140 pa_usec_t start, stop;
140 141
141 pa_log_debug("checking NEON sconv_s16le_from_float(%d)", SAMPLES); 142 pa_log_debug("checking NEON sconv_s16le_from_float(%d)", SAMPLES);
142 143
143 memset(samples_ref, 0, sizeof(samples_ref)); 144 memset(samples_ref, 0, sizeof(samples_ref));
144 memset(samples, 0, sizeof(samples)); 145 memset(samples, 0, sizeof(samples));
145 146
146 for (i = 0; i < SAMPLES; i++) { 147 for (i = 0; i < SAMPLES+ALIGN; i++) {
147 floats[i] = 2.1f * (rand()/(float) RAND_MAX - 0.5f); 148 floats[i] = 2.1f * (rand()/(float) RAND_MAX - 0.5f);
148 } 149 }
149 150
150 pa_sconv_s16le_from_float32ne(SAMPLES, floats, samples_ref); 151 pa_sconv_s16le_from_float32ne(SAMPLES, floats+ALIGN, samples_ref+ALIGN);
151 pa_sconv_s16le_from_f32ne_neon(SAMPLES, floats, samples); 152 pa_sconv_s16le_from_f32ne_neon(SAMPLES, floats+ALIGN, samples+ALIGN);
152 153
153 for (i = 0; i < SAMPLES; i++) { 154 for (i = ALIGN; i < SAMPLES+ALIGN; i++) {
154 if (abs(samples[i] - samples_ref[i]) > 0) { 155 if (abs(samples[i] - samples_ref[i]) > 0) {
155 pa_log_debug("%d: %d != %d (%f)", i, samples[i], samples_ref[i], 156 pa_log_debug("%d: %d != %d (%f)", i, samples[i], samples_ref[i],
156 floats[i]); 157 floats[i]);
157 } 158 }
158 } 159 }
159 160
160 start = pa_rtclock_now(); 161 start = pa_rtclock_now();
161 for (i = 0; i < TIMES; i++) { 162 for (i = 0; i < TIMES; i++) {
162 pa_sconv_s16le_from_f32ne_neon(SAMPLES, floats, samples); 163 pa_sconv_s16le_from_f32ne_neon(SAMPLES, floats+ALIGN, samples+ALIGN);
163 } 164 }
164 stop = pa_rtclock_now(); 165 stop = pa_rtclock_now();
165 pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); 166 pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start));
166 167
167 start = pa_rtclock_now(); 168 start = pa_rtclock_now();
168 for (i = 0; i < TIMES; i++) { 169 for (i = 0; i < TIMES; i++) {
169 pa_sconv_s16le_from_float32ne(SAMPLES, floats, samples_ref); 170 pa_sconv_s16le_from_float32ne(SAMPLES, floats+ALIGN, samples_ref+ALIGN);
170 } 171 }
171 stop = pa_rtclock_now(); 172 stop = pa_rtclock_now();
172 pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); 173 pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start));
173 } 174 }
174 175
175 static void run_test_to(void) { 176 static void run_test_to(void) {
176 int16_t samples[SAMPLES]; 177 int16_t samples[SAMPLES+ALIGN];
177 float floats[SAMPLES]; 178 float floats[SAMPLES+ALIGN];
178 float floats_ref[SAMPLES]; 179 float floats_ref[SAMPLES+ALIGN];
179 int i; 180 int i;
180 pa_usec_t start, stop; 181 pa_usec_t start, stop;
181 pa_convert_func_t func;
182 182
183 pa_log_debug("checking NEON sconv_s16le_to_float(%d)", SAMPLES); 183 pa_log_debug("checking NEON sconv_s16le_to_float(%d)", SAMPLES);
184 184
185 memset(floats_ref, 0, sizeof(floats_ref)); 185 memset(floats_ref, 0, sizeof(floats_ref));
186 memset(floats, 0, sizeof(float)); 186 memset(floats, 0, sizeof(float));
187 187
188 for (i = 0; i < SAMPLES; i++) { 188 for (i = 0; i < SAMPLES+ALIGN; i++) {
189 samples[i] = rand() - RAND_MAX/2; 189 samples[i] = rand() - RAND_MAX/2;
190 } 190 }
191 191
192 func = (pa_convert_func_t) pa_sconv_s16le_to_float32ne; 192 pa_sconv_s16le_to_float32ne(SAMPLES, samples+ALIGN, floats_ref+ALIGN);
193 func(SAMPLES, samples, floats_ref); 193 pa_sconv_s16le_to_f32ne_neon(SAMPLES, samples+ALIGN, floats+ALIGN);
194 pa_sconv_s16le_to_f32ne_neon(SAMPLES, samples, floats); 194
195 195 for (i = ALIGN; i < SAMPLES+ALIGN; i++) {
196 for (i = 0; i < SAMPLES; i++) {
197 if (fabsf(floats[i] - floats_ref[i]) > 0.00001) { 196 if (fabsf(floats[i] - floats_ref[i]) > 0.00001) {
198 pa_log_debug("%d: %.8f != %.8f (%d)", i, floats[i], floats_ref[i], 197 pa_log_debug("%d: %.8f != %.8f (%d)", i, floats[i], floats_ref[i],
199 samples[i]); 198 samples[i]);
200 } 199 }
201 } 200 }
202 201
203 start = pa_rtclock_now(); 202 start = pa_rtclock_now();
204 for (i = 0; i < TIMES; i++) { 203 for (i = 0; i < TIMES; i++) {
205 pa_sconv_s16le_to_f32ne_neon(SAMPLES, samples, floats); 204 pa_sconv_s16le_to_f32ne_neon(SAMPLES, samples+ALIGN, floats+ALIGN);
206 } 205 }
207 stop = pa_rtclock_now(); 206 stop = pa_rtclock_now();
208 pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); 207 pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start));
209 208
210 start = pa_rtclock_now(); 209 start = pa_rtclock_now();
211 for (i = 0; i < TIMES; i++) { 210 for (i = 0; i < TIMES; i++) {
212 func(SAMPLES, samples, floats_ref); 211 pa_sconv_s16le_to_float32ne(SAMPLES, samples+ALIGN, floats_ref+ALIGN);
213 } 212 }
214 stop = pa_rtclock_now(); 213 stop = pa_rtclock_now();
215 pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); 214 pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start));
216 } 215 }
217 216

Repositories maintained by Peter Meerwald, pmeerw@pmeerw.net.