comparison spandsp-0.0.6pre17/src/vector_int.c @ 4:26cd8f1ef0b1

import spandsp-0.0.6pre17
author Peter Meerwald <pmeerw@cosy.sbg.ac.at>
date Fri, 25 Jun 2010 15:50:58 +0200
parents
children
comparison
equal deleted inserted replaced
3:c6c5a16ce2f2 4:26cd8f1ef0b1
1 /*
2 * SpanDSP - a series of DSP components for telephony
3 *
4 * vector_int.c - Integer vector arithmetic
5 *
6 * Written by Steve Underwood <steveu@coppice.org>
7 *
8 * Copyright (C) 2006 Steve Underwood
9 *
10 * All rights reserved.
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU Lesser General Public License version 2.1,
14 * as published by the Free Software Foundation.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU Lesser General Public License for more details.
20 *
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 *
25 * $Id: vector_int.c,v 1.26.4.1 2009/12/28 11:54:59 steveu Exp $
26 */
27
28 /*! \file */
29
30 #if defined(HAVE_CONFIG_H)
31 #include "config.h"
32 #endif
33
34 #include <inttypes.h>
35 #include <stdlib.h>
36 #include <stdio.h>
37 #include <string.h>
38 #if defined(HAVE_TGMATH_H)
39 #include <tgmath.h>
40 #endif
41 #if defined(HAVE_MATH_H)
42 #include <math.h>
43 #endif
44 #include <assert.h>
45
46 #include "floating_fudge.h"
47 #include "mmx_sse_decs.h"
48
49 #include "spandsp/telephony.h"
50 #include "spandsp/vector_int.h"
51
52 SPAN_DECLARE(int32_t) vec_dot_prodi16(const int16_t x[], const int16_t y[], int n)
53 {
54 int32_t z;
55
56 #if defined(__GNUC__) && defined(SPANDSP_USE_MMX) && defined(__x86_64__)
57 __asm__ __volatile__(
58 " emms;\n"
59 " pxor %%mm0,%%mm0;\n"
60 " leaq -32(%%rsi,%%rax,2),%%rdx;\n" /* rdx = top - 32 */
61
62 " cmpq %%rdx,%%rsi;\n"
63 " ja 1f;\n"
64
65 /* Work in blocks of 16 int16_t's until we are near the end */
66 " .p2align 2;\n"
67 "2:\n"
68 " movq (%%rdi),%%mm1;\n"
69 " movq (%%rsi),%%mm2;\n"
70 " pmaddwd %%mm2,%%mm1;\n"
71 " paddd %%mm1,%%mm0;\n"
72 " movq 8(%%rdi),%%mm1;\n"
73 " movq 8(%%rsi),%%mm2;\n"
74 " pmaddwd %%mm2,%%mm1;\n"
75 " paddd %%mm1,%%mm0;\n"
76 " movq 16(%%rdi),%%mm1;\n"
77 " movq 16(%%rsi),%%mm2;\n"
78 " pmaddwd %%mm2,%%mm1;\n"
79 " paddd %%mm1,%%mm0;\n"
80 " movq 24(%%rdi),%%mm1;\n"
81 " movq 24(%%rsi),%%mm2;\n"
82 " pmaddwd %%mm2,%%mm1;\n"
83 " paddd %%mm1,%%mm0;\n"
84
85 " addq $32,%%rsi;\n"
86 " addq $32,%%rdi;\n"
87 " cmpq %%rdx,%%rsi;\n"
88 " jbe 2b;\n"
89
90 " .p2align 2;\n"
91 "1:\n"
92 " addq $24,%%rdx;\n" /* Now edx = top - 8 */
93 " cmpq %%rdx,%%rsi;\n"
94 " ja 3f;\n"
95
96 /* Work in blocks of 4 int16_t's until we are near the end */
97 " .p2align 2;\n"
98 "4:\n"
99 " movq (%%rdi),%%mm1;\n"
100 " movq (%%rsi),%%mm2;\n"
101 " pmaddwd %%mm2,%%mm1;\n"
102 " paddd %%mm1,%%mm0;\n"
103
104 " addq $8,%%rsi;\n"
105 " addq $8,%%rdi;\n"
106 " cmpq %%rdx,%%rsi;"
107 " jbe 4b;\n"
108
109 " .p2align 2;\n"
110 "3:\n"
111 " addq $4,%%rdx;\n" /* Now edx = top - 4 */
112 " cmpq %%rdx,%%rsi;\n"
113 " ja 5f;\n"
114
115 /* Work in a block of 2 int16_t's */
116 " movd (%%rdi),%%mm1;\n"
117 " movd (%%rsi),%%mm2;\n"
118 " pmaddwd %%mm2,%%mm1;\n"
119 " paddd %%mm1,%%mm0;\n"
120
121 " addq $4,%%rsi;\n"
122 " addq $4,%%rdi;\n"
123
124 " .p2align 2;\n"
125 "5:\n"
126 " addq $2,%%rdx;\n" /* Now edx = top - 2 */
127 " cmpq %%rdx,%%rsi;\n"
128 " ja 6f;\n"
129
130 /* Deal with the very last int16_t, when n is odd */
131 " movswl (%%rdi),%%eax;\n"
132 " andl $65535,%%eax;\n"
133 " movd %%eax,%%mm1;\n"
134 " movswl (%%rsi),%%eax;\n"
135 " andl $65535,%%eax;\n"
136 " movd %%eax,%%mm2;\n"
137 " pmaddwd %%mm2,%%mm1;\n"
138 " paddd %%mm1,%%mm0;\n"
139
140 " .p2align 2;\n"
141 "6:\n"
142 /* Merge the pieces of the answer */
143 " movq %%mm0,%%mm1;\n"
144 " punpckhdq %%mm0,%%mm1;\n"
145 " paddd %%mm1,%%mm0;\n"
146 /* Et voila, eax has the final result */
147 " movd %%mm0,%%eax;\n"
148
149 " emms;\n"
150 : "=a" (z)
151 : "S" (x), "D" (y), "a" (n)
152 : "cc"
153 );
154 #elif defined(__GNUC__) && defined(SPANDSP_USE_MMX) && defined(__i386__)
155 __asm__ __volatile__(
156 " emms;\n"
157 " pxor %%mm0,%%mm0;\n"
158 " leal -32(%%esi,%%eax,2),%%edx;\n" /* edx = top - 32 */
159
160 " cmpl %%edx,%%esi;\n"
161 " ja 1f;\n"
162
163 /* Work in blocks of 16 int16_t's until we are near the end */
164 " .p2align 2;\n"
165 "2:\n"
166 " movq (%%edi),%%mm1;\n"
167 " movq (%%esi),%%mm2;\n"
168 " pmaddwd %%mm2,%%mm1;\n"
169 " paddd %%mm1,%%mm0;\n"
170 " movq 8(%%edi),%%mm1;\n"
171 " movq 8(%%esi),%%mm2;\n"
172 " pmaddwd %%mm2,%%mm1;\n"
173 " paddd %%mm1,%%mm0;\n"
174 " movq 16(%%edi),%%mm1;\n"
175 " movq 16(%%esi),%%mm2;\n"
176 " pmaddwd %%mm2,%%mm1;\n"
177 " paddd %%mm1,%%mm0;\n"
178 " movq 24(%%edi),%%mm1;\n"
179 " movq 24(%%esi),%%mm2;\n"
180 " pmaddwd %%mm2,%%mm1;\n"
181 " paddd %%mm1,%%mm0;\n"
182
183 " addl $32,%%esi;\n"
184 " addl $32,%%edi;\n"
185 " cmpl %%edx,%%esi;\n"
186 " jbe 2b;\n"
187
188 " .p2align 2;\n"
189 "1:\n"
190 " addl $24,%%edx;\n" /* Now edx = top - 8 */
191 " cmpl %%edx,%%esi;\n"
192 " ja 3f;\n"
193
194 /* Work in blocks of 4 int16_t's until we are near the end */
195 " .p2align 2;\n"
196 "4:\n"
197 " movq (%%edi),%%mm1;\n"
198 " movq (%%esi),%%mm2;\n"
199 " pmaddwd %%mm2,%%mm1;\n"
200 " paddd %%mm1,%%mm0;\n"
201
202 " addl $8,%%esi;\n"
203 " addl $8,%%edi;\n"
204 " cmpl %%edx,%%esi;"
205 " jbe 4b;\n"
206
207 " .p2align 2;\n"
208 "3:\n"
209 " addl $4,%%edx;\n" /* Now edx = top - 4 */
210 " cmpl %%edx,%%esi;\n"
211 " ja 5f;\n"
212
213 /* Work in a block of 2 int16_t's */
214 " movd (%%edi),%%mm1;\n"
215 " movd (%%esi),%%mm2;\n"
216 " pmaddwd %%mm2,%%mm1;\n"
217 " paddd %%mm1,%%mm0;\n"
218
219 " addl $4,%%esi;\n"
220 " addl $4,%%edi;\n"
221
222 " .p2align 2;\n"
223 "5:\n"
224 " addl $2,%%edx;\n" /* Now edx = top - 2 */
225 " cmpl %%edx,%%esi;\n"
226 " ja 6f;\n"
227
228 /* Deal with the very last int16_t, when n is odd */
229 " movswl (%%edi),%%eax;\n"
230 " andl $65535,%%eax;\n"
231 " movd %%eax,%%mm1;\n"
232 " movswl (%%esi),%%eax;\n"
233 " andl $65535,%%eax;\n"
234 " movd %%eax,%%mm2;\n"
235 " pmaddwd %%mm2,%%mm1;\n"
236 " paddd %%mm1,%%mm0;\n"
237
238 " .p2align 2;\n"
239 "6:\n"
240 /* Merge the pieces of the answer */
241 " movq %%mm0,%%mm1;\n"
242 " punpckhdq %%mm0,%%mm1;\n"
243 " paddd %%mm1,%%mm0;\n"
244 /* Et voila, eax has the final result */
245 " movd %%mm0,%%eax;\n"
246
247 " emms;\n"
248 : "=a" (z)
249 : "S" (x), "D" (y), "a" (n)
250 : "cc"
251 );
252 #else
253 int i;
254
255 z = 0;
256 for (i = 0; i < n; i++)
257 z += (int32_t) x[i]*(int32_t) y[i];
258 #endif
259 return z;
260 }
261 /*- End of function --------------------------------------------------------*/
262
263 SPAN_DECLARE(int32_t) vec_circular_dot_prodi16(const int16_t x[], const int16_t y[], int n, int pos)
264 {
265 int32_t z;
266
267 z = vec_dot_prodi16(&x[pos], &y[0], n - pos);
268 z += vec_dot_prodi16(&x[0], &y[n - pos], pos);
269 return z;
270 }
271 /*- End of function --------------------------------------------------------*/
272
273 SPAN_DECLARE(void) vec_lmsi16(const int16_t x[], int16_t y[], int n, int16_t error)
274 {
275 int i;
276
277 for (i = 0; i < n; i++)
278 y[i] += (int16_t) (((int32_t) x[i]*(int32_t) error) >> 15);
279 }
280 /*- End of function --------------------------------------------------------*/
281
282 SPAN_DECLARE(void) vec_circular_lmsi16(const int16_t x[], int16_t y[], int n, int pos, int16_t error)
283 {
284 vec_lmsi16(&x[pos], &y[0], n - pos, error);
285 vec_lmsi16(&x[0], &y[n - pos], pos, error);
286 }
287 /*- End of function --------------------------------------------------------*/
288
289 SPAN_DECLARE(int32_t) vec_min_maxi16(const int16_t x[], int n, int16_t out[])
290 {
291 #if defined(__GNUC__) && defined(SPANDSP_USE_MMX) && defined(__x86_64__)
292 static const int32_t lower_bound = 0x80008000;
293 static const int32_t upper_bound = 0x7FFF7FFF;
294 int32_t max;
295
296 __asm__ __volatile__(
297 " emms;\n"
298 " pushq %%rdx;\n"
299 " leaq -8(%%rsi,%%rax,2),%%rdx;\n"
300
301 " cmpq %%rdx,%%rsi;\n"
302 " jbe 2f;\n"
303 " movd %[lower],%%mm0;\n"
304 " movd %[upper],%%mm1;\n"
305 " jmp 1f;\n"
306
307 " .p2align 2;\n"
308 "2:\n"
309 " movq (%%rsi),%%mm0;\n" /* mm0 will be max's */
310 " movq %%mm0,%%mm1;\n" /* mm1 will be min's */
311 " addq $8,%%rsi;\n"
312 " cmpq %%rdx,%%rsi;\n"
313 " ja 4f;\n"
314
315 "3:\n"
316 " movq (%%rsi),%%mm2;\n"
317
318 " movq %%mm2,%%mm3;\n"
319 " pcmpgtw %%mm0,%%mm3;\n" /* mm3 is bitmask for words where mm2 > mm0 */
320 " movq %%mm3,%%mm4;\n"
321 " pand %%mm2,%%mm3;\n" /* mm3 is mm2 masked to new max's */
322 " pandn %%mm0,%%mm4;\n" /* mm4 is mm0 masked to its max's */
323 " por %%mm3,%%mm4;\n"
324 " movq %%mm4,%%mm0;\n" /* Now mm0 is updated max's */
325
326 " movq %%mm1,%%mm3;\n"
327 " pcmpgtw %%mm2,%%mm3;\n" /* mm3 is bitmask for words where mm2 < mm1 */
328 " pand %%mm3,%%mm2;\n" /* mm2 is mm2 masked to new min's */
329 " pandn %%mm1,%%mm3;\n" /* mm3 is mm1 masked to its min's */
330 " por %%mm3,%%mm2;\n"
331 " movq %%mm2,%%mm1;\n" /* now mm1 is updated min's */
332
333 " addq $8,%%rsi;\n"
334 " cmpq %%rdx,%%rsi;\n"
335 " jbe 3b;\n"
336
337 " .p2align 2;\n"
338 "4:\n"
339 /* Merge down the 4-word max/mins to lower 2 words */
340 " movq %%mm0,%%mm2;\n"
341 " psrlq $32,%%mm2;\n"
342 " movq %%mm2,%%mm3;\n"
343 " pcmpgtw %%mm0,%%mm3;\n" /* mm3 is bitmask for words where mm2 > mm0 */
344 " pand %%mm3,%%mm2;\n" /* mm2 is mm2 masked to new max's */
345 " pandn %%mm0,%%mm3;\n" /* mm3 is mm0 masked to its max's */
346 " por %%mm3,%%mm2;\n"
347 " movq %%mm2,%%mm0;\n" /* now mm0 is updated max's */
348
349 " movq %%mm1,%%mm2;\n"
350 " psrlq $32,%%mm2;\n"
351 " movq %%mm1,%%mm3;\n"
352 " pcmpgtw %%mm2,%%mm3;\n" /* mm3 is bitmask for words where mm2 < mm1 */
353 " pand %%mm3,%%mm2;\n" /* mm2 is mm2 masked to new min's */
354 " pandn %%mm1,%%mm3;\n" /* mm3 is mm1 masked to its min's */
355 " por %%mm3,%%mm2;\n"
356 " movq %%mm2,%%mm1;\n" /* now mm1 is updated min's */
357
358 " .p2align 2;\n"
359 "1:\n"
360 " addq $4,%%rdx;\n" /* now dx = top-4 */
361 " cmpq %%rdx,%%rsi;\n"
362 " ja 5f;\n"
363 /* Here, there are >= 2 words of input remaining */
364 " movd (%%rsi),%%mm2;\n"
365
366 " movq %%mm2,%%mm3;\n"
367 " pcmpgtw %%mm0,%%mm3;\n" /* mm3 is bitmask for words where mm2 > mm0 */
368 " movq %%mm3,%%mm4;\n"
369 " pand %%mm2,%%mm3;\n" /* mm3 is mm2 masked to new max's */
370 " pandn %%mm0,%%mm4;\n" /* mm4 is mm0 masked to its max's */
371 " por %%mm3,%%mm4;\n"
372 " movq %%mm4,%%mm0;\n" /* now mm0 is updated max's */
373
374 " movq %%mm1,%%mm3;\n"
375 " pcmpgtw %%mm2,%%mm3;\n" /* mm3 is bitmask for words where mm2 < mm1 */
376 " pand %%mm3,%%mm2;\n" /* mm2 is mm2 masked to new min's */
377 " pandn %%mm1,%%mm3;\n" /* mm3 is mm1 masked to its min's */
378 " por %%mm3,%%mm2;\n"
379 " movq %%mm2,%%mm1;\n" /* now mm1 is updated min's */
380
381 " addq $4,%%rsi;\n"
382
383 " .p2align 2;\n"
384 "5:\n"
385 /* Merge down the 2-word max/mins to 1 word */
386 " movq %%mm0,%%mm2;\n"
387 " psrlq $16,%%mm2;\n"
388 " movq %%mm2,%%mm3;\n"
389 " pcmpgtw %%mm0,%%mm3;\n" /* mm3 is bitmask for words where mm2 > mm0 */
390 " pand %%mm3,%%mm2;\n" /* mm2 is mm2 masked to new max's */
391 " pandn %%mm0,%%mm3;\n" /* mm3 is mm0 masked to its max's */
392 " por %%mm3,%%mm2;\n"
393 " movd %%mm2,%%ecx;\n" /* cx is max so far */
394
395 " movq %%mm1,%%mm2;\n"
396 " psrlq $16,%%mm2;\n"
397 " movq %%mm1,%%mm3;\n"
398 " pcmpgtw %%mm2,%%mm3;\n" /* mm3 is bitmask for words where mm2 < mm1 */
399 " pand %%mm3,%%mm2;\n" /* mm2 is mm2 masked to new min's */
400 " pandn %%mm1,%%mm3;\n" /* mm3 is mm1 masked to its min's */
401 " por %%mm3,%%mm2;\n"
402 " movd %%mm2,%%eax;\n" /* ax is min so far */
403
404 " addq $2,%%rdx;\n" /* now dx = top-2 */
405 " cmpq %%rdx,%%rsi;\n"
406 " ja 6f;\n"
407
408 /* Here, there is one word of input left */
409 " cmpw (%%rsi),%%cx;\n"
410 " jge 9f;\n"
411 " movw (%%rsi),%%cx;\n"
412 " .p2align 2;\n"
413 "9:\n"
414 " cmpw (%%rsi),%%ax;\n"
415 " jle 6f;\n"
416 " movw (%%rsi),%%ax;\n"
417
418 " .p2align 2;\n"
419 "6:\n"
420 /* (finally!) cx is the max, ax the min */
421 " movswl %%cx,%%ecx;\n"
422 " movswl %%ax,%%eax;\n"
423
424 " popq %%rdx;\n" /* ptr to output max,min vals */
425 " andq %%rdx,%%rdx;\n"
426 " jz 7f;\n"
427 " movw %%cx,(%%rdx);\n" /* max */
428 " movw %%ax,2(%%rdx);\n" /* min */
429 " .p2align 2;\n"
430 "7:\n"
431 /* Now calculate max absolute value */
432 " negl %%eax;\n"
433 " cmpl %%ecx,%%eax;\n"
434 " jge 8f;\n"
435 " movl %%ecx,%%eax;\n"
436 " .p2align 2;\n"
437 "8:\n"
438 " emms;\n"
439 : "=a" (max)
440 : "S" (x), "a" (n), "d" (out), [lower] "m" (lower_bound), [upper] "m" (upper_bound)
441 : "ecx"
442 );
443 #elif defined(__GNUC__) && defined(SPANDSP_USE_MMX) && defined(__i386__)
444 static const int32_t lower_bound = 0x80008000;
445 static const int32_t upper_bound = 0x7FFF7FFF;
446 int32_t max;
447
448 __asm__ __volatile__(
449 " emms;\n"
450 " pushl %%edx;\n"
451 " leal -8(%%esi,%%eax,2),%%edx;\n"
452
453 " cmpl %%edx,%%esi;\n"
454 " jbe 2f;\n"
455 " movd %[lower],%%mm0;\n"
456 " movd %[upper],%%mm1;\n"
457 " jmp 1f;\n"
458
459 " .p2align 2;\n"
460 "2:\n"
461 " movq (%%esi),%%mm0;\n" /* mm0 will be max's */
462 " movq %%mm0,%%mm1;\n" /* mm1 will be min's */
463 " addl $8,%%esi;\n"
464 " cmpl %%edx,%%esi;\n"
465 " ja 4f;\n"
466
467 " .p2align 2;\n"
468 "3:\n"
469 " movq (%%esi),%%mm2;\n"
470
471 " movq %%mm2,%%mm3;\n"
472 " pcmpgtw %%mm0,%%mm3;\n" /* mm3 is bitmask for words where mm2 > mm0 */
473 " movq %%mm3,%%mm4;\n"
474 " pand %%mm2,%%mm3;\n" /* mm3 is mm2 masked to new max's */
475 " pandn %%mm0,%%mm4;\n" /* mm4 is mm0 masked to its max's */
476 " por %%mm3,%%mm4;\n"
477 " movq %%mm4,%%mm0;\n" /* Now mm0 is updated max's */
478
479 " movq %%mm1,%%mm3;\n"
480 " pcmpgtw %%mm2,%%mm3;\n" /* mm3 is bitmask for words where mm2 < mm1 */
481 " pand %%mm3,%%mm2;\n" /* mm2 is mm2 masked to new min's */
482 " pandn %%mm1,%%mm3;\n" /* mm3 is mm1 masked to its min's */
483 " por %%mm3,%%mm2;\n"
484 " movq %%mm2,%%mm1;\n" /* now mm1 is updated min's */
485
486 " addl $8,%%esi;\n"
487 " cmpl %%edx,%%esi;\n"
488 " jbe 3b;\n"
489
490 " .p2align 2;\n"
491 "4:\n"
492 /* Merge down the 4-word max/mins to lower 2 words */
493 " movq %%mm0,%%mm2;\n"
494 " psrlq $32,%%mm2;\n"
495 " movq %%mm2,%%mm3;\n"
496 " pcmpgtw %%mm0,%%mm3;\n" /* mm3 is bitmask for words where mm2 > mm0 */
497 " pand %%mm3,%%mm2;\n" /* mm2 is mm2 masked to new max's */
498 " pandn %%mm0,%%mm3;\n" /* mm3 is mm0 masked to its max's */
499 " por %%mm3,%%mm2;\n"
500 " movq %%mm2,%%mm0;\n" /* now mm0 is updated max's */
501
502 " movq %%mm1,%%mm2;\n"
503 " psrlq $32,%%mm2;\n"
504 " movq %%mm1,%%mm3;\n"
505 " pcmpgtw %%mm2,%%mm3;\n" /* mm3 is bitmask for words where mm2 < mm1 */
506 " pand %%mm3,%%mm2;\n" /* mm2 is mm2 masked to new min's */
507 " pandn %%mm1,%%mm3;\n" /* mm3 is mm1 masked to its min's */
508 " por %%mm3,%%mm2;\n"
509 " movq %%mm2,%%mm1;\n" /* now mm1 is updated min's */
510
511 " .p2align 2;\n"
512 "1:\n"
513 " addl $4,%%edx;\n" /* now dx = top-4 */
514 " cmpl %%edx,%%esi;\n"
515 " ja 5f;\n"
516 /* Here, there are >= 2 words of input remaining */
517 " movd (%%esi),%%mm2;\n"
518
519 " movq %%mm2,%%mm3;\n"
520 " pcmpgtw %%mm0,%%mm3;\n" /* mm3 is bitmask for words where mm2 > mm0 */
521 " movq %%mm3,%%mm4;\n"
522 " pand %%mm2,%%mm3;\n" /* mm3 is mm2 masked to new max's */
523 " pandn %%mm0,%%mm4;\n" /* mm4 is mm0 masked to its max's */
524 " por %%mm3,%%mm4;\n"
525 " movq %%mm4,%%mm0;\n" /* now mm0 is updated max's */
526
527 " movq %%mm1,%%mm3;\n"
528 " pcmpgtw %%mm2,%%mm3;\n" /* mm3 is bitmask for words where mm2 < mm1 */
529 " pand %%mm3,%%mm2;\n" /* mm2 is mm2 masked to new min's */
530 " pandn %%mm1,%%mm3;\n" /* mm3 is mm1 masked to its min's */
531 " por %%mm3,%%mm2;\n"
532 " movq %%mm2,%%mm1;\n" /* now mm1 is updated min's */
533
534 " addl $4,%%esi;\n"
535
536 " .p2align 2;\n"
537 "5:\n"
538 /* Merge down the 2-word max/mins to 1 word */
539 " movq %%mm0,%%mm2;\n"
540 " psrlq $16,%%mm2;\n"
541 " movq %%mm2,%%mm3;\n"
542 " pcmpgtw %%mm0,%%mm3;\n" /* mm3 is bitmask for words where mm2 > mm0 */
543 " pand %%mm3,%%mm2;\n" /* mm2 is mm2 masked to new max's */
544 " pandn %%mm0,%%mm3;\n" /* mm3 is mm0 masked to its max's */
545 " por %%mm3,%%mm2;\n"
546 " movd %%mm2,%%ecx;\n" /* cx is max so far */
547
548 " movq %%mm1,%%mm2;\n"
549 " psrlq $16,%%mm2;\n"
550 " movq %%mm1,%%mm3;\n"
551 " pcmpgtw %%mm2,%%mm3;\n" /* mm3 is bitmask for words where mm2 < mm1 */
552 " pand %%mm3,%%mm2;\n" /* mm2 is mm2 masked to new min's */
553 " pandn %%mm1,%%mm3;\n" /* mm3 is mm1 masked to its min's */
554 " por %%mm3,%%mm2;\n"
555 " movd %%mm2,%%eax;\n" /* ax is min so far */
556
557 " addl $2,%%edx;\n" /* now dx = top-2 */
558 " cmpl %%edx,%%esi;\n"
559 " ja 6f;\n"
560
561 /* Here, there is one word of input left */
562 " cmpw (%%esi),%%cx;\n"
563 " jge 9f;\n"
564 " movw (%%esi),%%cx;\n"
565 " .p2align 2;\n"
566 "9:\n"
567 " cmpw (%%esi),%%ax;\n"
568 " jle 6f;\n"
569 " movw (%%esi),%%ax;\n"
570
571 " .p2align 2;\n"
572 "6:\n"
573 /* (finally!) cx is the max, ax the min */
574 " movswl %%cx,%%ecx;\n"
575 " movswl %%ax,%%eax;\n"
576
577 " popl %%edx;\n" /* ptr to output max,min vals */
578 " andl %%edx,%%edx;\n"
579 " jz 7f;\n"
580 " movw %%cx,(%%edx);\n" /* max */
581 " movw %%ax,2(%%edx);\n" /* min */
582 " .p2align 2;\n"
583 "7:\n"
584 /* Now calculate max absolute value */
585 " negl %%eax;\n"
586 " cmpl %%ecx,%%eax;\n"
587 " jge 8f;\n"
588 " movl %%ecx,%%eax;\n"
589 " .p2align 2;\n"
590 "8:\n"
591 " emms;\n"
592 : "=a" (max)
593 : "S" (x), "a" (n), "d" (out), [lower] "m" (lower_bound), [upper] "m" (upper_bound)
594 : "ecx"
595 );
596 #else
597 int i;
598 int16_t min;
599 int16_t max;
600 int16_t temp;
601 int32_t z;
602
603 max = INT16_MIN;
604 min = INT16_MAX;
605 for (i = 0; i < n; i++)
606 {
607 temp = x[i];
608 if (temp > max)
609 max = temp;
610 /*endif*/
611 if (temp < min)
612 min = temp;
613 /*endif*/
614 }
615 /*endfor*/
616 if (out)
617 {
618 out[0] = max;
619 out[1] = min;
620 }
621 z = abs(min);
622 if (z > max)
623 return z;
624 #endif
625 return max;
626 }
627 /*- End of function --------------------------------------------------------*/
628 /*- End of file ------------------------------------------------------------*/

Repositories maintained by Peter Meerwald, pmeerw@pmeerw.net.