comparison sconv_neon.c @ 0:e0040ee59c3c

import
author Peter Meerwald <p.meerwald@bct-electronic.com>
date Thu, 12 Jan 2012 17:27:46 +0100
parents
children b829afbea564
comparison
equal deleted inserted replaced
-1:000000000000 0:e0040ee59c3c
1 /*
2 * Copyright 2012 Peter Meerwald <p.meerwald@bct-electronic.com>
3 */
4
5 #include <stdlib.h>
6 #include <stdio.h>
7 #include <stdarg.h>
8 #include <string.h>
9 #include <math.h>
10 #include <sys/time.h>
11 #include <assert.h>
12
13 typedef short int16_t;
14 typedef void (*pa_convert_func_t)(unsigned n, const void *a, void *b);
15 typedef long long unsigned int pa_usec_t;
16
17 #define pa_assert(x) assert(x)
18
19 #define PA_CLAMP_UNLIKELY(x, low, high) \
20 (((x) < (low)) ? (low) : (((x) > (high)) ? (high) : (x)))
21
22 static void pa_log_info(const char *format, ...) {
23 va_list ap;
24 char buf[1024];
25 va_start(ap, format);
26 vsprintf(buf, format, ap);
27 printf("%s\n", buf);
28 va_end(ap);
29 }
30
31 #define pa_log_debug pa_log_info
32
33 static pa_usec_t pa_rtclock_now() {
34 struct timeval tv;
35 gettimeofday(&tv, NULL);
36
37 return tv.tv_sec * 1000000ULL + tv.tv_usec;
38 }
39
40 #if defined(__arm__)
41
42 #include "arm_neon.h"
43
44 void pa_sconv_s16le_from_float32ne(unsigned n, const float *a, int16_t *b) {
45 pa_assert(a);
46 pa_assert(b);
47
48 for (; n > 0; n--) {
49 float v = *(a++);
50
51 v = PA_CLAMP_UNLIKELY(v, -1.0f, 1.0f);
52 *(b++) = (int16_t) lrintf(v * 0x7FFF);
53 }
54 }
55
56 void pa_sconv_s16le_from_f32ne_neon(unsigned n, const float *a, int16_t *b) {
57 unsigned i;
58
59 const float32x4_t plusone4 = vdupq_n_f32(1.0f);
60 const float32x4_t minusone4 = vdupq_n_f32(-1.0f);
61 const float32x4_t half4 = vdupq_n_f32(0.5f);
62 const float32x4_t scale4 = vdupq_n_f32(32767.0f);
63 const uint32x4_t mask4 = vdupq_n_u32(0x80000000);
64
65 for (i = 0; i < n/4; i++) {
66 float32x4_t v4 = ((float32x4_t *)a)[i];
67 v4 = vmulq_f32(vmaxq_f32(vminq_f32(v4, plusone4) , minusone4), scale4);
68
69 const float32x4_t w4 = vreinterpretq_f32_u32(vorrq_u32(vandq_u32(
70 vreinterpretq_u32_f32(v4), mask4), vreinterpretq_u32_f32(half4)));
71
72 ((int16x4_t *)b)[i] = vmovn_s32(vcvtq_s32_f32(vaddq_f32(v4, w4)));
73 }
74
75 // leftovers
76 for (i = n & ~3; i < n; i++) {
77 b[i] = (int16_t) lrintf(PA_CLAMP_UNLIKELY(a[i], -1.0f, 1.0f) * 0x7FFF);
78 }
79 }
80
81 void pa_sconv_s16le_to_float32ne(unsigned n, const int16_t *a, float *b) {
82 pa_assert(a);
83 pa_assert(b);
84
85 for (; n > 0; n--)
86 *(b++) = ((float) (*(a++)))/(float) 0x7FFF;
87 }
88
89 void pa_sconv_s16le_to_f32ne_neon(unsigned n, const int16_t *a, float *b) {
90 unsigned i;
91
92 const float32x4_t invscale4 = vdupq_n_f32(1.0f / 0x7FFF);
93
94 for (i = 0; i < n/4; i++) {
95 ((float32x4_t *)b)[i] = vmulq_f32(vcvtq_f32_s32(vmovl_s16(((int16x4_t *)a)[i])), invscale4);
96 }
97
98 // leftovers
99 const float invscale = 1.0f / 0x7FFF;
100 for (i = n & ~3; i < n; i++) {
101 b[i] = a[i] * invscale;
102 }
103 }
104
105 #define SAMPLES 1019
106 #define TIMES 300
107
108 static void run_test_from(void) {
109 int16_t samples[SAMPLES];
110 int16_t samples_ref[SAMPLES];
111 float floats[SAMPLES];
112 int i;
113 pa_usec_t start, stop;
114 pa_convert_func_t func;
115
116 pa_log_debug("checking NEON sconv_s16le_from_float(%d)", SAMPLES);
117
118 memset(samples_ref, 0, sizeof(samples_ref));
119 memset(samples, 0, sizeof(samples));
120
121 for (i = 0; i < SAMPLES; i++) {
122 floats[i] = 2.1f * (rand()/(float) RAND_MAX - 0.5f);
123 }
124
125 func = (pa_convert_func_t) pa_sconv_s16le_from_float32ne;
126 func(SAMPLES, floats, samples_ref);
127 pa_sconv_s16le_from_f32ne_neon(SAMPLES, floats, samples);
128
129 for (i = 0; i < SAMPLES; i++) {
130 if (abs(samples[i] - samples_ref[i]) > 0) {
131 pa_log_debug("%d: %d != %d (%f)", i, samples[i], samples_ref[i],
132 floats[i]);
133 }
134 }
135
136 start = pa_rtclock_now();
137 for (i = 0; i < TIMES; i++) {
138 pa_sconv_s16le_from_f32ne_neon(SAMPLES, floats, samples);
139 }
140 stop = pa_rtclock_now();
141 pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start));
142
143 start = pa_rtclock_now();
144 for (i = 0; i < TIMES; i++) {
145 func(SAMPLES, floats, samples_ref);
146 }
147 stop = pa_rtclock_now();
148 pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start));
149 }
150
151 static void run_test_to(void) {
152 int16_t samples[SAMPLES];
153 float floats[SAMPLES];
154 float floats_ref[SAMPLES];
155 int i;
156 pa_usec_t start, stop;
157 pa_convert_func_t func;
158
159 pa_log_debug("checking NEON sconv_s16le_to_float(%d)", SAMPLES);
160
161 memset(floats_ref, 0, sizeof(floats_ref));
162 memset(floats, 0, sizeof(float));
163
164 for (i = 0; i < SAMPLES; i++) {
165 samples[i] = rand() - RAND_MAX/2;
166 }
167
168 func = (pa_convert_func_t) pa_sconv_s16le_to_float32ne;
169 func(SAMPLES, samples, floats_ref);
170 pa_sconv_s16le_to_f32ne_neon(SAMPLES, samples, floats);
171
172 for (i = 0; i < SAMPLES; i++) {
173 if (fabsf(floats[i] - floats_ref[i]) > 0.00001) {
174 pa_log_debug("%d: %.8f != %.8f (%d)", i, floats[i], floats_ref[i],
175 samples[i]);
176 }
177 }
178
179 start = pa_rtclock_now();
180 for (i = 0; i < TIMES; i++) {
181 pa_sconv_s16le_to_f32ne_neon(SAMPLES, samples, floats);
182 }
183 stop = pa_rtclock_now();
184 pa_log_info("NEON: %llu usec.", (long long unsigned int)(stop - start));
185
186 start = pa_rtclock_now();
187 for (i = 0; i < TIMES; i++) {
188 func(SAMPLES, samples, floats_ref);
189 }
190 stop = pa_rtclock_now();
191 pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start));
192 }
193
194 #endif /* defined(__arm__) */
195
196 int main() {
197
198 run_test_from();
199 run_test_to();
200
201 return EXIT_SUCCESS;
202 }

Repositories maintained by Peter Meerwald, pmeerw@pmeerw.net.