Mercurial > hg > pa-neon
annotate sconv_neon.c @ 5:07763f536182 default tip
ALIGNment support
author | Peter Meerwald <p.meerwald@bct-electronic.com> |
---|---|
date | Sun, 08 Jul 2012 21:48:08 +0200 |
parents | e889fd0e7769 |
children |
rev | line source |
---|---|
0 | 1 /* |
2 * Copyright 2012 Peter Meerwald <p.meerwald@bct-electronic.com> | |
3 */ | |
4 | |
5 #include <stdlib.h> | |
6 #include <stdio.h> | |
7 #include <stdarg.h> | |
8 #include <string.h> | |
9 #include <math.h> | |
10 #include <sys/time.h> | |
11 #include <assert.h> | |
12 | |
13 typedef short int16_t; | |
14 typedef void (*pa_convert_func_t)(unsigned n, const void *a, void *b); | |
15 typedef long long unsigned int pa_usec_t; | |
16 | |
17 #define pa_assert(x) assert(x) | |
18 | |
19 #define PA_CLAMP_UNLIKELY(x, low, high) \ | |
20 (((x) < (low)) ? (low) : (((x) > (high)) ? (high) : (x))) | |
21 | |
22 static void pa_log_info(const char *format, ...) { | |
23 va_list ap; | |
24 char buf[1024]; | |
25 va_start(ap, format); | |
26 vsprintf(buf, format, ap); | |
27 printf("%s\n", buf); | |
28 va_end(ap); | |
29 } | |
30 | |
31 #define pa_log_debug pa_log_info | |
32 | |
33 static pa_usec_t pa_rtclock_now() { | |
34 struct timeval tv; | |
35 gettimeofday(&tv, NULL); | |
36 | |
37 return tv.tv_sec * 1000000ULL + tv.tv_usec; | |
38 } | |
39 | |
40 #if defined(__arm__) | |
41 | |
42 #include "arm_neon.h" | |
43 | |
3 | 44 void pa_sconv_s16le_from_float32ne(unsigned n, const float *src, int16_t *dst) { |
45 pa_assert(src); | |
46 pa_assert(dst); | |
0 | 47 |
48 for (; n > 0; n--) { | |
3 | 49 float v = *(src++); |
0 | 50 |
51 v = PA_CLAMP_UNLIKELY(v, -1.0f, 1.0f); | |
3 | 52 *(dst++) = (int16_t) lrintf(v * 0x7FFF); |
0 | 53 } |
54 } | |
55 | |
3 | 56 void pa_sconv_s16le_from_f32ne_neon(unsigned n, const float *src, int16_t *dst) { |
57 unsigned i = n & 3; | |
0 | 58 |
3 | 59 asm volatile ( |
60 "mov %[n], %[n], lsr #2\n\t" | |
61 "vdup.f32 q2, %[plusone]\n\t" | |
62 "vneg.f32 q3, q2\n\t" | |
63 "vdup.f32 q4, %[scale]\n\t" | |
64 "vdup.u32 q5, %[mask]\n\t" | |
65 "vdup.f32 q6, %[half]\n\t" | |
66 "1:\n\t" | |
67 "vld1.32 {q0}, [%[src]]!\n\t" | |
68 "vmin.f32 q0, q0, q2\n\t" /* clamp */ | |
69 "vmax.f32 q0, q0, q3\n\t" | |
70 "vmul.f32 q0, q0, q4\n\t" /* scale */ | |
71 "vand.u32 q1, q0, q5\n\t" | |
72 "vorr.u32 q1, q1, q6\n\t" /* round */ | |
73 "vadd.f32 q0, q0, q1\n\t" | |
74 "vcvt.s32.f32 q0, q0\n\t" /* narrow */ | |
75 "vmovn.i32 d0, q0\n\t" | |
76 "subs %[n], %[n], #1\n\t" | |
77 "vst1.16 {d0}, [%[dst]]!\n\t" | |
78 "bgt 1b\n\t" | |
79 /* output operands (or input operands that get modified) */ | |
80 : [dst] "+r" (dst), [src] "+r" (src), [n] "+r" (n) | |
81 : [plusone] "r" (1.0f), [scale] "r" (32767.0f), | |
82 [half] "r" (0.5f), [mask] "r" (0x80000000) /* input operands */ | |
83 : "memory", "cc", "q0", "q1", "q2", "q3", "q4", "q5", "q6" /* clobber list */ | |
84 ); | |
0 | 85 |
86 // leftovers | |
3 | 87 while (i--) { |
88 *dst++ = (int16_t) lrintf(PA_CLAMP_UNLIKELY(*src, -1.0f, 1.0f) * 0x7FFF); | |
89 src++; | |
0 | 90 } |
91 } | |
92 | |
3 | 93 void pa_sconv_s16le_to_float32ne(unsigned n, const int16_t *src, float *dst) { |
94 pa_assert(src); | |
95 pa_assert(dst); | |
0 | 96 |
97 for (; n > 0; n--) | |
3 | 98 *(dst++) = ((float) (*(src++)))/(float) 0x7FFF; |
0 | 99 } |
100 | |
3 | 101 void pa_sconv_s16le_to_f32ne_neon(unsigned n, const int16_t *src, float *dst) { |
102 unsigned i = n & 3; | |
103 | |
104 const float invscale = 1.0f / 0x7FFF; | |
0 | 105 |
3 | 106 asm volatile ( |
107 "mov %[n], %[n], lsr #2\n\t" | |
108 "vdup.f32 q1, %[invscale]\n\t" | |
109 "1:\n\t" | |
110 "vld1.16 {d0}, [%[src]]!\n\t" | |
111 "vmovl.s16 q0, d0\n\t" | |
5
07763f536182
ALIGNment support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
3
diff
changeset
|
112 |
3 | 113 "vcvt.f32.s32 q0, q0\n\t" |
114 "vmul.f32 q0, q0, q1\n\t" | |
0 | 115 |
3 | 116 "subs %[n], %[n], #1\n\t" |
117 "vst1.32 {q0}, [%[dst]]!\n\t" | |
118 "bgt 1b\n\t" | |
119 /* output operands (or input operands that get modified) */ | |
120 : [dst] "+r" (dst), [src] "+r" (src), [n] "+r" (n) | |
121 : [invscale] "r" (invscale) /* input operands */ | |
122 : "memory", "cc", "q0", "q1" /* clobber list */ | |
123 ); | |
0 | 124 |
125 // leftovers | |
3 | 126 while (i--) { |
127 *dst++ = *src++ * invscale; | |
0 | 128 } |
129 } | |
130 | |
131 #define SAMPLES 1019 | |
3 | 132 #define TIMES 100000 |
5
07763f536182
ALIGNment support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
3
diff
changeset
|
133 #define ALIGN 1 |
0 | 134 |
135 static void run_test_from(void) { | |
5
07763f536182
ALIGNment support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
3
diff
changeset
|
136 int16_t samples[SAMPLES+ALIGN]; |
07763f536182
ALIGNment support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
3
diff
changeset
|
137 int16_t samples_ref[SAMPLES+ALIGN]; |
07763f536182
ALIGNment support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
3
diff
changeset
|
138 float floats[SAMPLES+ALIGN]; |
0 | 139 int i; |
140 pa_usec_t start, stop; | |
141 | |
142 pa_log_debug("checking NEON sconv_s16le_from_float(%d)", SAMPLES); | |
143 | |
144 memset(samples_ref, 0, sizeof(samples_ref)); | |
145 memset(samples, 0, sizeof(samples)); | |
146 | |
5
07763f536182
ALIGNment support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
3
diff
changeset
|
147 for (i = 0; i < SAMPLES+ALIGN; i++) { |
0 | 148 floats[i] = 2.1f * (rand()/(float) RAND_MAX - 0.5f); |
149 } | |
150 | |
5
07763f536182
ALIGNment support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
3
diff
changeset
|
151 pa_sconv_s16le_from_float32ne(SAMPLES, floats+ALIGN, samples_ref+ALIGN); |
07763f536182
ALIGNment support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
3
diff
changeset
|
152 pa_sconv_s16le_from_f32ne_neon(SAMPLES, floats+ALIGN, samples+ALIGN); |
0 | 153 |
5
07763f536182
ALIGNment support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
3
diff
changeset
|
154 for (i = ALIGN; i < SAMPLES+ALIGN; i++) { |
0 | 155 if (abs(samples[i] - samples_ref[i]) > 0) { |
156 pa_log_debug("%d: %d != %d (%f)", i, samples[i], samples_ref[i], | |
157 floats[i]); | |
158 } | |
159 } | |
160 | |
161 start = pa_rtclock_now(); | |
162 for (i = 0; i < TIMES; i++) { | |
5
07763f536182
ALIGNment support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
3
diff
changeset
|
163 pa_sconv_s16le_from_f32ne_neon(SAMPLES, floats+ALIGN, samples+ALIGN); |
0 | 164 } |
165 stop = pa_rtclock_now(); | |
166 pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); | |
167 | |
168 start = pa_rtclock_now(); | |
169 for (i = 0; i < TIMES; i++) { | |
5
07763f536182
ALIGNment support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
3
diff
changeset
|
170 pa_sconv_s16le_from_float32ne(SAMPLES, floats+ALIGN, samples_ref+ALIGN); |
0 | 171 } |
172 stop = pa_rtclock_now(); | |
173 pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); | |
174 } | |
175 | |
176 static void run_test_to(void) { | |
5
07763f536182
ALIGNment support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
3
diff
changeset
|
177 int16_t samples[SAMPLES+ALIGN]; |
07763f536182
ALIGNment support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
3
diff
changeset
|
178 float floats[SAMPLES+ALIGN]; |
07763f536182
ALIGNment support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
3
diff
changeset
|
179 float floats_ref[SAMPLES+ALIGN]; |
0 | 180 int i; |
181 pa_usec_t start, stop; | |
182 | |
183 pa_log_debug("checking NEON sconv_s16le_to_float(%d)", SAMPLES); | |
184 | |
185 memset(floats_ref, 0, sizeof(floats_ref)); | |
186 memset(floats, 0, sizeof(float)); | |
187 | |
5
07763f536182
ALIGNment support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
3
diff
changeset
|
188 for (i = 0; i < SAMPLES+ALIGN; i++) { |
0 | 189 samples[i] = rand() - RAND_MAX/2; |
190 } | |
191 | |
5
07763f536182
ALIGNment support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
3
diff
changeset
|
192 pa_sconv_s16le_to_float32ne(SAMPLES, samples+ALIGN, floats_ref+ALIGN); |
07763f536182
ALIGNment support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
3
diff
changeset
|
193 pa_sconv_s16le_to_f32ne_neon(SAMPLES, samples+ALIGN, floats+ALIGN); |
0 | 194 |
5
07763f536182
ALIGNment support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
3
diff
changeset
|
195 for (i = ALIGN; i < SAMPLES+ALIGN; i++) { |
0 | 196 if (fabsf(floats[i] - floats_ref[i]) > 0.00001) { |
197 pa_log_debug("%d: %.8f != %.8f (%d)", i, floats[i], floats_ref[i], | |
198 samples[i]); | |
199 } | |
200 } | |
201 | |
202 start = pa_rtclock_now(); | |
203 for (i = 0; i < TIMES; i++) { | |
5
07763f536182
ALIGNment support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
3
diff
changeset
|
204 pa_sconv_s16le_to_f32ne_neon(SAMPLES, samples+ALIGN, floats+ALIGN); |
0 | 205 } |
206 stop = pa_rtclock_now(); | |
207 pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); | |
208 | |
209 start = pa_rtclock_now(); | |
210 for (i = 0; i < TIMES; i++) { | |
5
07763f536182
ALIGNment support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
3
diff
changeset
|
211 pa_sconv_s16le_to_float32ne(SAMPLES, samples+ALIGN, floats_ref+ALIGN); |
0 | 212 } |
213 stop = pa_rtclock_now(); | |
214 pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); | |
215 } | |
216 | |
217 #endif /* defined(__arm__) */ | |
218 | |
219 int main() { | |
220 | |
221 run_test_from(); | |
222 run_test_to(); | |
223 | |
224 return EXIT_SUCCESS; | |
225 } |