Mercurial > hg > pa-neon
comparison sconv_neon.c @ 0:e0040ee59c3c
import
author | Peter Meerwald <p.meerwald@bct-electronic.com> |
---|---|
date | Thu, 12 Jan 2012 17:27:46 +0100 |
parents | |
children | b829afbea564 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e0040ee59c3c |
---|---|
1 /* | |
2 * Copyright 2012 Peter Meerwald <p.meerwald@bct-electronic.com> | |
3 */ | |
4 | |
5 #include <stdlib.h> | |
6 #include <stdio.h> | |
7 #include <stdarg.h> | |
8 #include <string.h> | |
9 #include <math.h> | |
10 #include <sys/time.h> | |
11 #include <assert.h> | |
12 | |
13 typedef short int16_t; | |
14 typedef void (*pa_convert_func_t)(unsigned n, const void *a, void *b); | |
15 typedef long long unsigned int pa_usec_t; | |
16 | |
17 #define pa_assert(x) assert(x) | |
18 | |
19 #define PA_CLAMP_UNLIKELY(x, low, high) \ | |
20 (((x) < (low)) ? (low) : (((x) > (high)) ? (high) : (x))) | |
21 | |
22 static void pa_log_info(const char *format, ...) { | |
23 va_list ap; | |
24 char buf[1024]; | |
25 va_start(ap, format); | |
26 vsprintf(buf, format, ap); | |
27 printf("%s\n", buf); | |
28 va_end(ap); | |
29 } | |
30 | |
31 #define pa_log_debug pa_log_info | |
32 | |
33 static pa_usec_t pa_rtclock_now() { | |
34 struct timeval tv; | |
35 gettimeofday(&tv, NULL); | |
36 | |
37 return tv.tv_sec * 1000000ULL + tv.tv_usec; | |
38 } | |
39 | |
40 #if defined(__arm__) | |
41 | |
42 #include "arm_neon.h" | |
43 | |
44 void pa_sconv_s16le_from_float32ne(unsigned n, const float *a, int16_t *b) { | |
45 pa_assert(a); | |
46 pa_assert(b); | |
47 | |
48 for (; n > 0; n--) { | |
49 float v = *(a++); | |
50 | |
51 v = PA_CLAMP_UNLIKELY(v, -1.0f, 1.0f); | |
52 *(b++) = (int16_t) lrintf(v * 0x7FFF); | |
53 } | |
54 } | |
55 | |
56 void pa_sconv_s16le_from_f32ne_neon(unsigned n, const float *a, int16_t *b) { | |
57 unsigned i; | |
58 | |
59 const float32x4_t plusone4 = vdupq_n_f32(1.0f); | |
60 const float32x4_t minusone4 = vdupq_n_f32(-1.0f); | |
61 const float32x4_t half4 = vdupq_n_f32(0.5f); | |
62 const float32x4_t scale4 = vdupq_n_f32(32767.0f); | |
63 const uint32x4_t mask4 = vdupq_n_u32(0x80000000); | |
64 | |
65 for (i = 0; i < n/4; i++) { | |
66 float32x4_t v4 = ((float32x4_t *)a)[i]; | |
67 v4 = vmulq_f32(vmaxq_f32(vminq_f32(v4, plusone4) , minusone4), scale4); | |
68 | |
69 const float32x4_t w4 = vreinterpretq_f32_u32(vorrq_u32(vandq_u32( | |
70 vreinterpretq_u32_f32(v4), mask4), vreinterpretq_u32_f32(half4))); | |
71 | |
72 ((int16x4_t *)b)[i] = vmovn_s32(vcvtq_s32_f32(vaddq_f32(v4, w4))); | |
73 } | |
74 | |
75 // leftovers | |
76 for (i = n & ~3; i < n; i++) { | |
77 b[i] = (int16_t) lrintf(PA_CLAMP_UNLIKELY(a[i], -1.0f, 1.0f) * 0x7FFF); | |
78 } | |
79 } | |
80 | |
81 void pa_sconv_s16le_to_float32ne(unsigned n, const int16_t *a, float *b) { | |
82 pa_assert(a); | |
83 pa_assert(b); | |
84 | |
85 for (; n > 0; n--) | |
86 *(b++) = ((float) (*(a++)))/(float) 0x7FFF; | |
87 } | |
88 | |
89 void pa_sconv_s16le_to_f32ne_neon(unsigned n, const int16_t *a, float *b) { | |
90 unsigned i; | |
91 | |
92 const float32x4_t invscale4 = vdupq_n_f32(1.0f / 0x7FFF); | |
93 | |
94 for (i = 0; i < n/4; i++) { | |
95 ((float32x4_t *)b)[i] = vmulq_f32(vcvtq_f32_s32(vmovl_s16(((int16x4_t *)a)[i])), invscale4); | |
96 } | |
97 | |
98 // leftovers | |
99 const float invscale = 1.0f / 0x7FFF; | |
100 for (i = n & ~3; i < n; i++) { | |
101 b[i] = a[i] * invscale; | |
102 } | |
103 } | |
104 | |
105 #define SAMPLES 1019 | |
106 #define TIMES 300 | |
107 | |
108 static void run_test_from(void) { | |
109 int16_t samples[SAMPLES]; | |
110 int16_t samples_ref[SAMPLES]; | |
111 float floats[SAMPLES]; | |
112 int i; | |
113 pa_usec_t start, stop; | |
114 pa_convert_func_t func; | |
115 | |
116 pa_log_debug("checking NEON sconv_s16le_from_float(%d)", SAMPLES); | |
117 | |
118 memset(samples_ref, 0, sizeof(samples_ref)); | |
119 memset(samples, 0, sizeof(samples)); | |
120 | |
121 for (i = 0; i < SAMPLES; i++) { | |
122 floats[i] = 2.1f * (rand()/(float) RAND_MAX - 0.5f); | |
123 } | |
124 | |
125 func = (pa_convert_func_t) pa_sconv_s16le_from_float32ne; | |
126 func(SAMPLES, floats, samples_ref); | |
127 pa_sconv_s16le_from_f32ne_neon(SAMPLES, floats, samples); | |
128 | |
129 for (i = 0; i < SAMPLES; i++) { | |
130 if (abs(samples[i] - samples_ref[i]) > 0) { | |
131 pa_log_debug("%d: %d != %d (%f)", i, samples[i], samples_ref[i], | |
132 floats[i]); | |
133 } | |
134 } | |
135 | |
136 start = pa_rtclock_now(); | |
137 for (i = 0; i < TIMES; i++) { | |
138 pa_sconv_s16le_from_f32ne_neon(SAMPLES, floats, samples); | |
139 } | |
140 stop = pa_rtclock_now(); | |
141 pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); | |
142 | |
143 start = pa_rtclock_now(); | |
144 for (i = 0; i < TIMES; i++) { | |
145 func(SAMPLES, floats, samples_ref); | |
146 } | |
147 stop = pa_rtclock_now(); | |
148 pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); | |
149 } | |
150 | |
151 static void run_test_to(void) { | |
152 int16_t samples[SAMPLES]; | |
153 float floats[SAMPLES]; | |
154 float floats_ref[SAMPLES]; | |
155 int i; | |
156 pa_usec_t start, stop; | |
157 pa_convert_func_t func; | |
158 | |
159 pa_log_debug("checking NEON sconv_s16le_to_float(%d)", SAMPLES); | |
160 | |
161 memset(floats_ref, 0, sizeof(floats_ref)); | |
162 memset(floats, 0, sizeof(float)); | |
163 | |
164 for (i = 0; i < SAMPLES; i++) { | |
165 samples[i] = rand() - RAND_MAX/2; | |
166 } | |
167 | |
168 func = (pa_convert_func_t) pa_sconv_s16le_to_float32ne; | |
169 func(SAMPLES, samples, floats_ref); | |
170 pa_sconv_s16le_to_f32ne_neon(SAMPLES, samples, floats); | |
171 | |
172 for (i = 0; i < SAMPLES; i++) { | |
173 if (fabsf(floats[i] - floats_ref[i]) > 0.00001) { | |
174 pa_log_debug("%d: %.8f != %.8f (%d)", i, floats[i], floats_ref[i], | |
175 samples[i]); | |
176 } | |
177 } | |
178 | |
179 start = pa_rtclock_now(); | |
180 for (i = 0; i < TIMES; i++) { | |
181 pa_sconv_s16le_to_f32ne_neon(SAMPLES, samples, floats); | |
182 } | |
183 stop = pa_rtclock_now(); | |
184 pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start)); | |
185 | |
186 start = pa_rtclock_now(); | |
187 for (i = 0; i < TIMES; i++) { | |
188 func(SAMPLES, samples, floats_ref); | |
189 } | |
190 stop = pa_rtclock_now(); | |
191 pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); | |
192 } | |
193 | |
194 #endif /* defined(__arm__) */ | |
195 | |
196 int main() { | |
197 | |
198 run_test_from(); | |
199 run_test_to(); | |
200 | |
201 return EXIT_SUCCESS; | |
202 } |