Mercurial > hg > audiostuff
comparison spandsp-0.0.3/spandsp-0.0.3/src/vector_int.c @ 5:f762bf195c4b
import spandsp-0.0.3
author | Peter Meerwald <pmeerw@cosy.sbg.ac.at> |
---|---|
date | Fri, 25 Jun 2010 16:00:21 +0200 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
4:26cd8f1ef0b1 | 5:f762bf195c4b |
---|---|
1 /* | |
2 * SpanDSP - a series of DSP components for telephony | |
3 * | |
4 * vector_int.c - Integer vector arithmetic | |
5 * | |
6 * Written by Steve Underwood <steveu@coppice.org> | |
7 * | |
8 * Copyright (C) 2006 Steve Underwood | |
9 * | |
10 * All rights reserved. | |
11 * | |
12 * This program is free software; you can redistribute it and/or modify | |
13 * it under the terms of the GNU General Public License version 2, as | |
14 * published by the Free Software Foundation. | |
15 * | |
16 * This program is distributed in the hope that it will be useful, | |
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
19 * GNU General Public License for more details. | |
20 * | |
21 * You should have received a copy of the GNU General Public License | |
22 * along with this program; if not, write to the Free Software | |
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
24 * | |
25 * $Id: vector_int.c,v 1.5 2006/11/19 14:07:26 steveu Exp $ | |
26 */ | |
27 | |
28 /*! \file */ | |
29 | |
30 #ifdef HAVE_CONFIG_H | |
31 #include <config.h> | |
32 #endif | |
33 | |
34 #include <inttypes.h> | |
35 #include <stdlib.h> | |
36 #include <stdio.h> | |
37 #include <string.h> | |
38 #if defined(HAVE_TGMATH_H) | |
39 #include <tgmath.h> | |
40 #endif | |
41 #if defined(HAVE_MATH_H) | |
42 #include <math.h> | |
43 #endif | |
44 #include <assert.h> | |
45 | |
46 #include "spandsp/telephony.h" | |
47 #include "spandsp/logging.h" | |
48 #include "spandsp/vector_int.h" | |
49 | |
50 int32_t vec_dot_prodi16(const int16_t x[], const int16_t y[], int n) | |
51 { | |
52 int32_t z; | |
53 | |
54 #if defined(__GNUC__) && defined(__i386__) | |
55 __asm__ __volatile__( | |
56 " emms;\n" | |
57 " pxor %%mm0,%%mm0;\n" | |
58 " leal -32(%%esi,%%eax,2),%%edx;\n" /* edx = top - 32 */ | |
59 | |
60 " cmpl %%edx,%%esi;\n" | |
61 " ja 1f;\n" | |
62 | |
63 /* Work in blocks of 16 int16_t's until we are near the end */ | |
64 " .p2align 2;\n" | |
65 "2:\n" | |
66 " movq (%%edi),%%mm1;\n" | |
67 " movq (%%esi),%%mm2;\n" | |
68 " pmaddwd %%mm2,%%mm1;\n" | |
69 " paddd %%mm1,%%mm0;\n" | |
70 " movq 8(%%edi),%%mm1;\n" | |
71 " movq 8(%%esi),%%mm2;\n" | |
72 " pmaddwd %%mm2,%%mm1;\n" | |
73 " paddd %%mm1,%%mm0;\n" | |
74 " movq 16(%%edi),%%mm1;\n" | |
75 " movq 16(%%esi),%%mm2;\n" | |
76 " pmaddwd %%mm2,%%mm1;\n" | |
77 " paddd %%mm1,%%mm0;\n" | |
78 " movq 24(%%edi),%%mm1;\n" | |
79 " movq 24(%%esi),%%mm2;\n" | |
80 " pmaddwd %%mm2,%%mm1;\n" | |
81 " paddd %%mm1,%%mm0;\n" | |
82 | |
83 " addl $32,%%esi;\n" | |
84 " addl $32,%%edi;\n" | |
85 " cmpl %%edx,%%esi;\n" | |
86 " jbe 2b;\n" | |
87 | |
88 " .p2align 2;\n" | |
89 "1:\n" | |
90 " addl $24,%%edx;\n" /* now edx = top - 8 */ | |
91 " cmpl %%edx,%%esi;\n" | |
92 " ja 3f;\n" | |
93 | |
94 /* Work in blocks of 4 int16_t's until we are near the end */ | |
95 " .p2align 2;\n" | |
96 "4:\n" | |
97 " movq (%%edi),%%mm1;\n" | |
98 " movq (%%esi),%%mm2;\n" | |
99 " pmaddwd %%mm2,%%mm1;\n" | |
100 " paddd %%mm1,%%mm0;\n" | |
101 | |
102 " addl $8,%%esi;\n" | |
103 " addl $8,%%edi;\n" | |
104 " cmpl %%edx,%%esi;" | |
105 " jbe 4b;\n" | |
106 | |
107 " .p2align 2;\n" | |
108 "3:\n" | |
109 " addl $4,%%edx;\n" /* now edx = top - 4 */ | |
110 " cmpl %%edx,%%esi;\n" | |
111 " ja 5f;\n" | |
112 | |
113 /* Work in a block of 2 int16_t's */ | |
114 " movd (%%edi),%%mm1;\n" | |
115 " movd (%%esi),%%mm2;\n" | |
116 " pmaddwd %%mm2,%%mm1;\n" | |
117 " paddd %%mm1,%%mm0;\n" | |
118 | |
119 " addl $4,%%esi;\n" | |
120 " addl $4,%%edi;\n" | |
121 | |
122 " .p2align 2;\n" | |
123 "5:\n" | |
124 " addl $2,%%edx;\n" /* now edx = top - 2 */ | |
125 " cmpl %%edx,%%esi;\n" | |
126 " ja 6f;\n" | |
127 | |
128 /* Deal with the very last int16_t, when n is odd */ | |
129 " movswl (%%edi),%%eax;\n" | |
130 " andl $65535,%%eax;\n" | |
131 " movd %%eax,%%mm1;\n" | |
132 " movswl (%%esi),%%eax;\n" | |
133 " andl $65535,%%eax;\n" | |
134 " movd %%eax,%%mm2;\n" | |
135 " pmaddwd %%mm2,%%mm1;\n" | |
136 " paddd %%mm1,%%mm0;\n" | |
137 | |
138 " .p2align 2;\n" | |
139 "6:\n" | |
140 /* Merge the pieces of the answer */ | |
141 " movq %%mm0,%%mm1;\n" | |
142 " punpckhdq %%mm0,%%mm1;\n" | |
143 " paddd %%mm1,%%mm0;\n" | |
144 /* et voila, eax has the final result */ | |
145 " movd %%mm0,%%eax;\n" | |
146 | |
147 " emms;\n" | |
148 : "=a" (z) | |
149 : "S" (x), "D" (y), "a" (n) | |
150 : "cc" | |
151 ); | |
152 #else | |
153 int i; | |
154 | |
155 z = 0; | |
156 for (i = 0; i < n; i++) | |
157 z += x[i]*y[i]; | |
158 #endif | |
159 return z; | |
160 } | |
161 /*- End of function --------------------------------------------------------*/ | |
162 | |
163 int32_t vec_min_maxi16(const int16_t x[], int n, int16_t out[]) | |
164 { | |
165 #if defined(__GNUC__) && defined(__i386__) | |
166 static const int32_t lower_bound = 0x80008000; | |
167 static const int32_t upper_bound = 0x7FFF7FFF; | |
168 int32_t max; | |
169 | |
170 __asm__ __volatile__( | |
171 " emms;\n" | |
172 " pushl %%edx;\n" | |
173 " leal -8(%%esi,%%eax,2),%%edx;\n" | |
174 | |
175 " cmpl %%edx,%%esi;\n" | |
176 " jbe 2f;\n" | |
177 " movd %[lower],%%mm0;\n" | |
178 " movd %[upper],%%mm1;\n" | |
179 " jmp 1f;\n" | |
180 | |
181 " .p2align 2;\n" | |
182 "2:\n" | |
183 " movq (%%esi),%%mm0;\n" /* mm0 will be max's */ | |
184 " movq %%mm0,%%mm1;\n" /* mm1 will be min's */ | |
185 " addl $8,%%esi;\n" | |
186 " cmpl %%edx,%%esi;\n" | |
187 " ja 4f;\n" | |
188 | |
189 " .p2align 2;\n" | |
190 "3:\n" | |
191 " movq (%%esi),%%mm2;\n" | |
192 | |
193 " movq %%mm2,%%mm3;\n" | |
194 " pcmpgtw %%mm0,%%mm3;\n" /* mm3 is bitmask for words where mm2 > mm0 */ | |
195 " movq %%mm3,%%mm4;\n" | |
196 " pand %%mm2,%%mm3;\n" /* mm3 is mm2 masked to new max's */ | |
197 " pandn %%mm0,%%mm4;\n" /* mm4 is mm0 masked to its max's */ | |
198 " por %%mm3,%%mm4;\n" | |
199 " movq %%mm4,%%mm0;\n" /* Now mm0 is updated max's */ | |
200 | |
201 " movq %%mm1,%%mm3;\n" | |
202 " pcmpgtw %%mm2,%%mm3;\n" /* mm3 is bitmask for words where mm2 < mm1 */ | |
203 " pand %%mm3,%%mm2;\n" /* mm2 is mm2 masked to new min's */ | |
204 " pandn %%mm1,%%mm3;\n" /* mm3 is mm1 masked to its min's */ | |
205 " por %%mm3,%%mm2;\n" | |
206 " movq %%mm2,%%mm1;\n" /* now mm1 is updated min's */ | |
207 | |
208 " addl $8,%%esi;\n" | |
209 " cmpl %%edx,%%esi;\n" | |
210 " jbe 3b;\n" | |
211 | |
212 " .p2align 2;\n" | |
213 "4:\n" | |
214 /* Merge down the 4-word max/mins to lower 2 words */ | |
215 " movq %%mm0,%%mm2;\n" | |
216 " psrlq $32,%%mm2;\n" | |
217 " movq %%mm2,%%mm3;\n" | |
218 " pcmpgtw %%mm0,%%mm3;\n" /* mm3 is bitmask for words where mm2 > mm0 */ | |
219 " pand %%mm3,%%mm2;\n" /* mm2 is mm2 masked to new max's */ | |
220 " pandn %%mm0,%%mm3;\n" /* mm3 is mm0 masked to its max's */ | |
221 " por %%mm3,%%mm2;\n" | |
222 " movq %%mm2,%%mm0;\n" /* now mm0 is updated max's */ | |
223 | |
224 " movq %%mm1,%%mm2;\n" | |
225 " psrlq $32,%%mm2;\n" | |
226 " movq %%mm1,%%mm3;\n" | |
227 " pcmpgtw %%mm2,%%mm3;\n" /* mm3 is bitmask for words where mm2 < mm1 */ | |
228 " pand %%mm3,%%mm2;\n" /* mm2 is mm2 masked to new min's */ | |
229 " pandn %%mm1,%%mm3;\n" /* mm3 is mm1 masked to its min's */ | |
230 " por %%mm3,%%mm2;\n" | |
231 " movq %%mm2,%%mm1;\n" /* now mm1 is updated min's */ | |
232 | |
233 " .p2align 2;\n" | |
234 "1:\n" | |
235 " addl $4,%%edx;\n" /* now dx = top-4 */ | |
236 " cmpl %%edx,%%esi;\n" | |
237 " ja 5f;\n" | |
238 /* Here, there are >= 2 words of input remaining */ | |
239 " movd (%%esi),%%mm2;\n" | |
240 | |
241 " movq %%mm2,%%mm3;\n" | |
242 " pcmpgtw %%mm0,%%mm3;\n" /* mm3 is bitmask for words where mm2 > mm0 */ | |
243 " movq %%mm3,%%mm4;\n" | |
244 " pand %%mm2,%%mm3;\n" /* mm3 is mm2 masked to new max's */ | |
245 " pandn %%mm0,%%mm4;\n" /* mm4 is mm0 masked to its max's */ | |
246 " por %%mm3,%%mm4;\n" | |
247 " movq %%mm4,%%mm0;\n" /* now mm0 is updated max's */ | |
248 | |
249 " movq %%mm1,%%mm3;\n" | |
250 " pcmpgtw %%mm2,%%mm3;\n" /* mm3 is bitmask for words where mm2 < mm1 */ | |
251 " pand %%mm3,%%mm2;\n" /* mm2 is mm2 masked to new min's */ | |
252 " pandn %%mm1,%%mm3;\n" /* mm3 is mm1 masked to its min's */ | |
253 " por %%mm3,%%mm2;\n" | |
254 " movq %%mm2,%%mm1;\n" /* now mm1 is updated min's */ | |
255 | |
256 " addl $4,%%esi;\n" | |
257 | |
258 " .p2align 2;\n" | |
259 "5:\n" | |
260 /* Merge down the 2-word max/mins to 1 word */ | |
261 " movq %%mm0,%%mm2;\n" | |
262 " psrlq $16,%%mm2;\n" | |
263 " movq %%mm2,%%mm3;\n" | |
264 " pcmpgtw %%mm0,%%mm3;\n" /* mm3 is bitmask for words where mm2 > mm0 */ | |
265 " pand %%mm3,%%mm2;\n" /* mm2 is mm2 masked to new max's */ | |
266 " pandn %%mm0,%%mm3;\n" /* mm3 is mm0 masked to its max's */ | |
267 " por %%mm3,%%mm2;\n" | |
268 " movd %%mm2,%%ecx;\n" /* cx is max so far */ | |
269 | |
270 " movq %%mm1,%%mm2;\n" | |
271 " psrlq $16,%%mm2;\n" | |
272 " movq %%mm1,%%mm3;\n" | |
273 " pcmpgtw %%mm2,%%mm3;\n" /* mm3 is bitmask for words where mm2 < mm1 */ | |
274 " pand %%mm3,%%mm2;\n" /* mm2 is mm2 masked to new min's */ | |
275 " pandn %%mm1,%%mm3;\n" /* mm3 is mm1 masked to its min's */ | |
276 " por %%mm3,%%mm2;\n" | |
277 " movd %%mm2,%%eax;\n" /* ax is min so far */ | |
278 | |
279 " addl $2,%%edx;\n" /* now dx = top-2 */ | |
280 " cmpl %%edx,%%esi;\n" | |
281 " ja 6f;\n" | |
282 | |
283 /* Here, there is one word of input left */ | |
284 " cmpw (%%esi),%%cx;\n" | |
285 " jge 9f;\n" | |
286 " movw (%%esi),%%cx;\n" | |
287 " .p2align 2;\n" | |
288 "9:\n" | |
289 " cmpw (%%esi),%%ax;\n" | |
290 " jle 6f;\n" | |
291 " movw (%%esi),%%ax;\n" | |
292 | |
293 " .p2align 2;\n" | |
294 "6:\n" | |
295 /* (finally!) cx is the max, ax the min */ | |
296 " movswl %%cx,%%ecx;\n" | |
297 " movswl %%ax,%%eax;\n" | |
298 | |
299 " popl %%edx;\n" /* ptr to output max,min vals */ | |
300 " andl %%edx,%%edx;\n" | |
301 " jz 7f;\n" | |
302 " movw %%cx,(%%edx);\n" /* max */ | |
303 " movw %%ax,2(%%edx);\n" /* min */ | |
304 " .p2align 2;\n" | |
305 "7:\n" | |
306 /* Now calculate max absolute value */ | |
307 " negl %%eax;\n" | |
308 " cmpl %%ecx,%%eax;\n" | |
309 " jge 8f;\n" | |
310 " movl %%ecx,%%eax;\n" | |
311 " .p2align 2;\n" | |
312 "8:\n" | |
313 " emms;\n" | |
314 : "=a" (max) | |
315 : "S" (x), "a" (n), "d" (out), [lower] "m" (lower_bound), [upper] "m" (upper_bound) | |
316 : "ecx" | |
317 ); | |
318 return max; | |
319 #else | |
320 int i; | |
321 int16_t min; | |
322 int16_t max; | |
323 int16_t temp; | |
324 int32_t z; | |
325 | |
326 max = INT16_MIN; | |
327 min = INT16_MAX; | |
328 for (i = 0; i < n; i++) | |
329 { | |
330 temp = x[i]; | |
331 if (temp > max) | |
332 max = temp; | |
333 /*endif*/ | |
334 if (temp < min) | |
335 min = temp; | |
336 /*endif*/ | |
337 } | |
338 /*endfor*/ | |
339 out[0] = max; | |
340 out[1] = min; | |
341 z = abs(min); | |
342 if (z > max) | |
343 return z; | |
344 return max; | |
345 #endif | |
346 } | |
347 /*- End of function --------------------------------------------------------*/ | |
348 /*- End of file ------------------------------------------------------------*/ |