Mercurial > hg > audiostuff
annotate spandsp-0.0.6pre17/src/vector_int.c @ 4:26cd8f1ef0b1
import spandsp-0.0.6pre17
author | Peter Meerwald <pmeerw@cosy.sbg.ac.at> |
---|---|
date | Fri, 25 Jun 2010 15:50:58 +0200 |
parents | |
children |
rev | line source |
---|---|
4
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
1 /* |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
2 * SpanDSP - a series of DSP components for telephony |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
3 * |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
4 * vector_int.c - Integer vector arithmetic |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
5 * |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
6 * Written by Steve Underwood <steveu@coppice.org> |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
7 * |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
8 * Copyright (C) 2006 Steve Underwood |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
9 * |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
10 * All rights reserved. |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
11 * |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
12 * This program is free software; you can redistribute it and/or modify |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
13 * it under the terms of the GNU Lesser General Public License version 2.1, |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
14 * as published by the Free Software Foundation. |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
15 * |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
16 * This program is distributed in the hope that it will be useful, |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
19 * GNU Lesser General Public License for more details. |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
20 * |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
21 * You should have received a copy of the GNU Lesser General Public |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
22 * License along with this program; if not, write to the Free Software |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
24 * |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
25 * $Id: vector_int.c,v 1.26.4.1 2009/12/28 11:54:59 steveu Exp $ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
26 */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
27 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
28 /*! \file */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
29 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
30 #if defined(HAVE_CONFIG_H) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
31 #include "config.h" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
32 #endif |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
33 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
34 #include <inttypes.h> |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
35 #include <stdlib.h> |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
36 #include <stdio.h> |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
37 #include <string.h> |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
38 #if defined(HAVE_TGMATH_H) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
39 #include <tgmath.h> |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
40 #endif |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
41 #if defined(HAVE_MATH_H) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
42 #include <math.h> |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
43 #endif |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
44 #include <assert.h> |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
45 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
46 #include "floating_fudge.h" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
47 #include "mmx_sse_decs.h" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
48 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
49 #include "spandsp/telephony.h" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
50 #include "spandsp/vector_int.h" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
51 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
52 SPAN_DECLARE(int32_t) vec_dot_prodi16(const int16_t x[], const int16_t y[], int n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
53 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
54 int32_t z; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
55 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
56 #if defined(__GNUC__) && defined(SPANDSP_USE_MMX) && defined(__x86_64__) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
57 __asm__ __volatile__( |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
58 " emms;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
59 " pxor %%mm0,%%mm0;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
60 " leaq -32(%%rsi,%%rax,2),%%rdx;\n" /* rdx = top - 32 */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
61 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
62 " cmpq %%rdx,%%rsi;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
63 " ja 1f;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
64 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
65 /* Work in blocks of 16 int16_t's until we are near the end */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
66 " .p2align 2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
67 "2:\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
68 " movq (%%rdi),%%mm1;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
69 " movq (%%rsi),%%mm2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
70 " pmaddwd %%mm2,%%mm1;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
71 " paddd %%mm1,%%mm0;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
72 " movq 8(%%rdi),%%mm1;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
73 " movq 8(%%rsi),%%mm2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
74 " pmaddwd %%mm2,%%mm1;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
75 " paddd %%mm1,%%mm0;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
76 " movq 16(%%rdi),%%mm1;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
77 " movq 16(%%rsi),%%mm2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
78 " pmaddwd %%mm2,%%mm1;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
79 " paddd %%mm1,%%mm0;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
80 " movq 24(%%rdi),%%mm1;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
81 " movq 24(%%rsi),%%mm2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
82 " pmaddwd %%mm2,%%mm1;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
83 " paddd %%mm1,%%mm0;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
84 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
85 " addq $32,%%rsi;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
86 " addq $32,%%rdi;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
87 " cmpq %%rdx,%%rsi;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
88 " jbe 2b;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
89 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
90 " .p2align 2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
91 "1:\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
92 " addq $24,%%rdx;\n" /* Now edx = top - 8 */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
93 " cmpq %%rdx,%%rsi;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
94 " ja 3f;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
95 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
96 /* Work in blocks of 4 int16_t's until we are near the end */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
97 " .p2align 2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
98 "4:\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
99 " movq (%%rdi),%%mm1;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
100 " movq (%%rsi),%%mm2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
101 " pmaddwd %%mm2,%%mm1;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
102 " paddd %%mm1,%%mm0;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
103 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
104 " addq $8,%%rsi;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
105 " addq $8,%%rdi;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
106 " cmpq %%rdx,%%rsi;" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
107 " jbe 4b;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
108 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
109 " .p2align 2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
110 "3:\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
111 " addq $4,%%rdx;\n" /* Now edx = top - 4 */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
112 " cmpq %%rdx,%%rsi;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
113 " ja 5f;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
114 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
115 /* Work in a block of 2 int16_t's */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
116 " movd (%%rdi),%%mm1;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
117 " movd (%%rsi),%%mm2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
118 " pmaddwd %%mm2,%%mm1;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
119 " paddd %%mm1,%%mm0;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
120 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
121 " addq $4,%%rsi;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
122 " addq $4,%%rdi;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
123 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
124 " .p2align 2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
125 "5:\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
126 " addq $2,%%rdx;\n" /* Now edx = top - 2 */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
127 " cmpq %%rdx,%%rsi;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
128 " ja 6f;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
129 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
130 /* Deal with the very last int16_t, when n is odd */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
131 " movswl (%%rdi),%%eax;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
132 " andl $65535,%%eax;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
133 " movd %%eax,%%mm1;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
134 " movswl (%%rsi),%%eax;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
135 " andl $65535,%%eax;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
136 " movd %%eax,%%mm2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
137 " pmaddwd %%mm2,%%mm1;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
138 " paddd %%mm1,%%mm0;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
139 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
140 " .p2align 2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
141 "6:\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
142 /* Merge the pieces of the answer */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
143 " movq %%mm0,%%mm1;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
144 " punpckhdq %%mm0,%%mm1;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
145 " paddd %%mm1,%%mm0;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
146 /* Et voila, eax has the final result */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
147 " movd %%mm0,%%eax;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
148 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
149 " emms;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
150 : "=a" (z) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
151 : "S" (x), "D" (y), "a" (n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
152 : "cc" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
153 ); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
154 #elif defined(__GNUC__) && defined(SPANDSP_USE_MMX) && defined(__i386__) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
155 __asm__ __volatile__( |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
156 " emms;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
157 " pxor %%mm0,%%mm0;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
158 " leal -32(%%esi,%%eax,2),%%edx;\n" /* edx = top - 32 */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
159 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
160 " cmpl %%edx,%%esi;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
161 " ja 1f;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
162 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
163 /* Work in blocks of 16 int16_t's until we are near the end */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
164 " .p2align 2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
165 "2:\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
166 " movq (%%edi),%%mm1;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
167 " movq (%%esi),%%mm2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
168 " pmaddwd %%mm2,%%mm1;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
169 " paddd %%mm1,%%mm0;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
170 " movq 8(%%edi),%%mm1;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
171 " movq 8(%%esi),%%mm2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
172 " pmaddwd %%mm2,%%mm1;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
173 " paddd %%mm1,%%mm0;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
174 " movq 16(%%edi),%%mm1;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
175 " movq 16(%%esi),%%mm2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
176 " pmaddwd %%mm2,%%mm1;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
177 " paddd %%mm1,%%mm0;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
178 " movq 24(%%edi),%%mm1;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
179 " movq 24(%%esi),%%mm2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
180 " pmaddwd %%mm2,%%mm1;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
181 " paddd %%mm1,%%mm0;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
182 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
183 " addl $32,%%esi;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
184 " addl $32,%%edi;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
185 " cmpl %%edx,%%esi;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
186 " jbe 2b;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
187 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
188 " .p2align 2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
189 "1:\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
190 " addl $24,%%edx;\n" /* Now edx = top - 8 */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
191 " cmpl %%edx,%%esi;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
192 " ja 3f;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
193 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
194 /* Work in blocks of 4 int16_t's until we are near the end */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
195 " .p2align 2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
196 "4:\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
197 " movq (%%edi),%%mm1;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
198 " movq (%%esi),%%mm2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
199 " pmaddwd %%mm2,%%mm1;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
200 " paddd %%mm1,%%mm0;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
201 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
202 " addl $8,%%esi;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
203 " addl $8,%%edi;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
204 " cmpl %%edx,%%esi;" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
205 " jbe 4b;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
206 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
207 " .p2align 2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
208 "3:\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
209 " addl $4,%%edx;\n" /* Now edx = top - 4 */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
210 " cmpl %%edx,%%esi;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
211 " ja 5f;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
212 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
213 /* Work in a block of 2 int16_t's */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
214 " movd (%%edi),%%mm1;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
215 " movd (%%esi),%%mm2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
216 " pmaddwd %%mm2,%%mm1;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
217 " paddd %%mm1,%%mm0;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
218 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
219 " addl $4,%%esi;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
220 " addl $4,%%edi;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
221 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
222 " .p2align 2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
223 "5:\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
224 " addl $2,%%edx;\n" /* Now edx = top - 2 */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
225 " cmpl %%edx,%%esi;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
226 " ja 6f;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
227 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
228 /* Deal with the very last int16_t, when n is odd */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
229 " movswl (%%edi),%%eax;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
230 " andl $65535,%%eax;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
231 " movd %%eax,%%mm1;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
232 " movswl (%%esi),%%eax;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
233 " andl $65535,%%eax;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
234 " movd %%eax,%%mm2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
235 " pmaddwd %%mm2,%%mm1;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
236 " paddd %%mm1,%%mm0;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
237 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
238 " .p2align 2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
239 "6:\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
240 /* Merge the pieces of the answer */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
241 " movq %%mm0,%%mm1;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
242 " punpckhdq %%mm0,%%mm1;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
243 " paddd %%mm1,%%mm0;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
244 /* Et voila, eax has the final result */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
245 " movd %%mm0,%%eax;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
246 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
247 " emms;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
248 : "=a" (z) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
249 : "S" (x), "D" (y), "a" (n) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
250 : "cc" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
251 ); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
252 #else |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
253 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
254 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
255 z = 0; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
256 for (i = 0; i < n; i++) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
257 z += (int32_t) x[i]*(int32_t) y[i]; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
258 #endif |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
259 return z; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
260 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
261 /*- End of function --------------------------------------------------------*/ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
262 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
263 SPAN_DECLARE(int32_t) vec_circular_dot_prodi16(const int16_t x[], const int16_t y[], int n, int pos) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
264 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
265 int32_t z; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
266 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
267 z = vec_dot_prodi16(&x[pos], &y[0], n - pos); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
268 z += vec_dot_prodi16(&x[0], &y[n - pos], pos); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
269 return z; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
270 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
271 /*- End of function --------------------------------------------------------*/ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
272 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
273 SPAN_DECLARE(void) vec_lmsi16(const int16_t x[], int16_t y[], int n, int16_t error) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
274 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
275 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
276 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
277 for (i = 0; i < n; i++) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
278 y[i] += (int16_t) (((int32_t) x[i]*(int32_t) error) >> 15); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
279 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
280 /*- End of function --------------------------------------------------------*/ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
281 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
282 SPAN_DECLARE(void) vec_circular_lmsi16(const int16_t x[], int16_t y[], int n, int pos, int16_t error) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
283 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
284 vec_lmsi16(&x[pos], &y[0], n - pos, error); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
285 vec_lmsi16(&x[0], &y[n - pos], pos, error); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
286 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
287 /*- End of function --------------------------------------------------------*/ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
288 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
289 SPAN_DECLARE(int32_t) vec_min_maxi16(const int16_t x[], int n, int16_t out[]) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
290 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
291 #if defined(__GNUC__) && defined(SPANDSP_USE_MMX) && defined(__x86_64__) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
292 static const int32_t lower_bound = 0x80008000; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
293 static const int32_t upper_bound = 0x7FFF7FFF; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
294 int32_t max; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
295 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
296 __asm__ __volatile__( |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
297 " emms;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
298 " pushq %%rdx;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
299 " leaq -8(%%rsi,%%rax,2),%%rdx;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
300 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
301 " cmpq %%rdx,%%rsi;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
302 " jbe 2f;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
303 " movd %[lower],%%mm0;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
304 " movd %[upper],%%mm1;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
305 " jmp 1f;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
306 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
307 " .p2align 2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
308 "2:\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
309 " movq (%%rsi),%%mm0;\n" /* mm0 will be max's */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
310 " movq %%mm0,%%mm1;\n" /* mm1 will be min's */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
311 " addq $8,%%rsi;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
312 " cmpq %%rdx,%%rsi;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
313 " ja 4f;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
314 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
315 "3:\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
316 " movq (%%rsi),%%mm2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
317 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
318 " movq %%mm2,%%mm3;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
319 " pcmpgtw %%mm0,%%mm3;\n" /* mm3 is bitmask for words where mm2 > mm0 */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
320 " movq %%mm3,%%mm4;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
321 " pand %%mm2,%%mm3;\n" /* mm3 is mm2 masked to new max's */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
322 " pandn %%mm0,%%mm4;\n" /* mm4 is mm0 masked to its max's */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
323 " por %%mm3,%%mm4;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
324 " movq %%mm4,%%mm0;\n" /* Now mm0 is updated max's */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
325 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
326 " movq %%mm1,%%mm3;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
327 " pcmpgtw %%mm2,%%mm3;\n" /* mm3 is bitmask for words where mm2 < mm1 */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
328 " pand %%mm3,%%mm2;\n" /* mm2 is mm2 masked to new min's */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
329 " pandn %%mm1,%%mm3;\n" /* mm3 is mm1 masked to its min's */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
330 " por %%mm3,%%mm2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
331 " movq %%mm2,%%mm1;\n" /* now mm1 is updated min's */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
332 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
333 " addq $8,%%rsi;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
334 " cmpq %%rdx,%%rsi;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
335 " jbe 3b;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
336 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
337 " .p2align 2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
338 "4:\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
339 /* Merge down the 4-word max/mins to lower 2 words */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
340 " movq %%mm0,%%mm2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
341 " psrlq $32,%%mm2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
342 " movq %%mm2,%%mm3;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
343 " pcmpgtw %%mm0,%%mm3;\n" /* mm3 is bitmask for words where mm2 > mm0 */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
344 " pand %%mm3,%%mm2;\n" /* mm2 is mm2 masked to new max's */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
345 " pandn %%mm0,%%mm3;\n" /* mm3 is mm0 masked to its max's */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
346 " por %%mm3,%%mm2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
347 " movq %%mm2,%%mm0;\n" /* now mm0 is updated max's */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
348 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
349 " movq %%mm1,%%mm2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
350 " psrlq $32,%%mm2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
351 " movq %%mm1,%%mm3;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
352 " pcmpgtw %%mm2,%%mm3;\n" /* mm3 is bitmask for words where mm2 < mm1 */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
353 " pand %%mm3,%%mm2;\n" /* mm2 is mm2 masked to new min's */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
354 " pandn %%mm1,%%mm3;\n" /* mm3 is mm1 masked to its min's */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
355 " por %%mm3,%%mm2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
356 " movq %%mm2,%%mm1;\n" /* now mm1 is updated min's */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
357 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
358 " .p2align 2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
359 "1:\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
360 " addq $4,%%rdx;\n" /* now dx = top-4 */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
361 " cmpq %%rdx,%%rsi;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
362 " ja 5f;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
363 /* Here, there are >= 2 words of input remaining */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
364 " movd (%%rsi),%%mm2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
365 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
366 " movq %%mm2,%%mm3;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
367 " pcmpgtw %%mm0,%%mm3;\n" /* mm3 is bitmask for words where mm2 > mm0 */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
368 " movq %%mm3,%%mm4;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
369 " pand %%mm2,%%mm3;\n" /* mm3 is mm2 masked to new max's */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
370 " pandn %%mm0,%%mm4;\n" /* mm4 is mm0 masked to its max's */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
371 " por %%mm3,%%mm4;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
372 " movq %%mm4,%%mm0;\n" /* now mm0 is updated max's */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
373 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
374 " movq %%mm1,%%mm3;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
375 " pcmpgtw %%mm2,%%mm3;\n" /* mm3 is bitmask for words where mm2 < mm1 */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
376 " pand %%mm3,%%mm2;\n" /* mm2 is mm2 masked to new min's */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
377 " pandn %%mm1,%%mm3;\n" /* mm3 is mm1 masked to its min's */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
378 " por %%mm3,%%mm2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
379 " movq %%mm2,%%mm1;\n" /* now mm1 is updated min's */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
380 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
381 " addq $4,%%rsi;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
382 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
383 " .p2align 2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
384 "5:\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
385 /* Merge down the 2-word max/mins to 1 word */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
386 " movq %%mm0,%%mm2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
387 " psrlq $16,%%mm2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
388 " movq %%mm2,%%mm3;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
389 " pcmpgtw %%mm0,%%mm3;\n" /* mm3 is bitmask for words where mm2 > mm0 */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
390 " pand %%mm3,%%mm2;\n" /* mm2 is mm2 masked to new max's */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
391 " pandn %%mm0,%%mm3;\n" /* mm3 is mm0 masked to its max's */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
392 " por %%mm3,%%mm2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
393 " movd %%mm2,%%ecx;\n" /* cx is max so far */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
394 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
395 " movq %%mm1,%%mm2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
396 " psrlq $16,%%mm2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
397 " movq %%mm1,%%mm3;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
398 " pcmpgtw %%mm2,%%mm3;\n" /* mm3 is bitmask for words where mm2 < mm1 */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
399 " pand %%mm3,%%mm2;\n" /* mm2 is mm2 masked to new min's */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
400 " pandn %%mm1,%%mm3;\n" /* mm3 is mm1 masked to its min's */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
401 " por %%mm3,%%mm2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
402 " movd %%mm2,%%eax;\n" /* ax is min so far */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
403 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
404 " addq $2,%%rdx;\n" /* now dx = top-2 */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
405 " cmpq %%rdx,%%rsi;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
406 " ja 6f;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
407 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
408 /* Here, there is one word of input left */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
409 " cmpw (%%rsi),%%cx;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
410 " jge 9f;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
411 " movw (%%rsi),%%cx;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
412 " .p2align 2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
413 "9:\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
414 " cmpw (%%rsi),%%ax;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
415 " jle 6f;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
416 " movw (%%rsi),%%ax;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
417 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
418 " .p2align 2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
419 "6:\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
420 /* (finally!) cx is the max, ax the min */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
421 " movswl %%cx,%%ecx;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
422 " movswl %%ax,%%eax;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
423 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
424 " popq %%rdx;\n" /* ptr to output max,min vals */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
425 " andq %%rdx,%%rdx;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
426 " jz 7f;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
427 " movw %%cx,(%%rdx);\n" /* max */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
428 " movw %%ax,2(%%rdx);\n" /* min */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
429 " .p2align 2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
430 "7:\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
431 /* Now calculate max absolute value */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
432 " negl %%eax;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
433 " cmpl %%ecx,%%eax;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
434 " jge 8f;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
435 " movl %%ecx,%%eax;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
436 " .p2align 2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
437 "8:\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
438 " emms;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
439 : "=a" (max) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
440 : "S" (x), "a" (n), "d" (out), [lower] "m" (lower_bound), [upper] "m" (upper_bound) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
441 : "ecx" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
442 ); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
443 #elif defined(__GNUC__) && defined(SPANDSP_USE_MMX) && defined(__i386__) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
444 static const int32_t lower_bound = 0x80008000; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
445 static const int32_t upper_bound = 0x7FFF7FFF; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
446 int32_t max; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
447 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
448 __asm__ __volatile__( |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
449 " emms;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
450 " pushl %%edx;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
451 " leal -8(%%esi,%%eax,2),%%edx;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
452 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
453 " cmpl %%edx,%%esi;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
454 " jbe 2f;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
455 " movd %[lower],%%mm0;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
456 " movd %[upper],%%mm1;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
457 " jmp 1f;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
458 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
459 " .p2align 2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
460 "2:\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
461 " movq (%%esi),%%mm0;\n" /* mm0 will be max's */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
462 " movq %%mm0,%%mm1;\n" /* mm1 will be min's */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
463 " addl $8,%%esi;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
464 " cmpl %%edx,%%esi;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
465 " ja 4f;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
466 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
467 " .p2align 2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
468 "3:\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
469 " movq (%%esi),%%mm2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
470 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
471 " movq %%mm2,%%mm3;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
472 " pcmpgtw %%mm0,%%mm3;\n" /* mm3 is bitmask for words where mm2 > mm0 */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
473 " movq %%mm3,%%mm4;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
474 " pand %%mm2,%%mm3;\n" /* mm3 is mm2 masked to new max's */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
475 " pandn %%mm0,%%mm4;\n" /* mm4 is mm0 masked to its max's */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
476 " por %%mm3,%%mm4;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
477 " movq %%mm4,%%mm0;\n" /* Now mm0 is updated max's */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
478 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
479 " movq %%mm1,%%mm3;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
480 " pcmpgtw %%mm2,%%mm3;\n" /* mm3 is bitmask for words where mm2 < mm1 */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
481 " pand %%mm3,%%mm2;\n" /* mm2 is mm2 masked to new min's */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
482 " pandn %%mm1,%%mm3;\n" /* mm3 is mm1 masked to its min's */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
483 " por %%mm3,%%mm2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
484 " movq %%mm2,%%mm1;\n" /* now mm1 is updated min's */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
485 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
486 " addl $8,%%esi;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
487 " cmpl %%edx,%%esi;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
488 " jbe 3b;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
489 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
490 " .p2align 2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
491 "4:\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
492 /* Merge down the 4-word max/mins to lower 2 words */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
493 " movq %%mm0,%%mm2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
494 " psrlq $32,%%mm2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
495 " movq %%mm2,%%mm3;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
496 " pcmpgtw %%mm0,%%mm3;\n" /* mm3 is bitmask for words where mm2 > mm0 */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
497 " pand %%mm3,%%mm2;\n" /* mm2 is mm2 masked to new max's */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
498 " pandn %%mm0,%%mm3;\n" /* mm3 is mm0 masked to its max's */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
499 " por %%mm3,%%mm2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
500 " movq %%mm2,%%mm0;\n" /* now mm0 is updated max's */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
501 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
502 " movq %%mm1,%%mm2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
503 " psrlq $32,%%mm2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
504 " movq %%mm1,%%mm3;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
505 " pcmpgtw %%mm2,%%mm3;\n" /* mm3 is bitmask for words where mm2 < mm1 */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
506 " pand %%mm3,%%mm2;\n" /* mm2 is mm2 masked to new min's */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
507 " pandn %%mm1,%%mm3;\n" /* mm3 is mm1 masked to its min's */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
508 " por %%mm3,%%mm2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
509 " movq %%mm2,%%mm1;\n" /* now mm1 is updated min's */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
510 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
511 " .p2align 2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
512 "1:\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
513 " addl $4,%%edx;\n" /* now dx = top-4 */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
514 " cmpl %%edx,%%esi;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
515 " ja 5f;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
516 /* Here, there are >= 2 words of input remaining */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
517 " movd (%%esi),%%mm2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
518 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
519 " movq %%mm2,%%mm3;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
520 " pcmpgtw %%mm0,%%mm3;\n" /* mm3 is bitmask for words where mm2 > mm0 */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
521 " movq %%mm3,%%mm4;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
522 " pand %%mm2,%%mm3;\n" /* mm3 is mm2 masked to new max's */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
523 " pandn %%mm0,%%mm4;\n" /* mm4 is mm0 masked to its max's */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
524 " por %%mm3,%%mm4;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
525 " movq %%mm4,%%mm0;\n" /* now mm0 is updated max's */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
526 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
527 " movq %%mm1,%%mm3;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
528 " pcmpgtw %%mm2,%%mm3;\n" /* mm3 is bitmask for words where mm2 < mm1 */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
529 " pand %%mm3,%%mm2;\n" /* mm2 is mm2 masked to new min's */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
530 " pandn %%mm1,%%mm3;\n" /* mm3 is mm1 masked to its min's */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
531 " por %%mm3,%%mm2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
532 " movq %%mm2,%%mm1;\n" /* now mm1 is updated min's */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
533 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
534 " addl $4,%%esi;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
535 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
536 " .p2align 2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
537 "5:\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
538 /* Merge down the 2-word max/mins to 1 word */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
539 " movq %%mm0,%%mm2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
540 " psrlq $16,%%mm2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
541 " movq %%mm2,%%mm3;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
542 " pcmpgtw %%mm0,%%mm3;\n" /* mm3 is bitmask for words where mm2 > mm0 */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
543 " pand %%mm3,%%mm2;\n" /* mm2 is mm2 masked to new max's */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
544 " pandn %%mm0,%%mm3;\n" /* mm3 is mm0 masked to its max's */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
545 " por %%mm3,%%mm2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
546 " movd %%mm2,%%ecx;\n" /* cx is max so far */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
547 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
548 " movq %%mm1,%%mm2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
549 " psrlq $16,%%mm2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
550 " movq %%mm1,%%mm3;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
551 " pcmpgtw %%mm2,%%mm3;\n" /* mm3 is bitmask for words where mm2 < mm1 */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
552 " pand %%mm3,%%mm2;\n" /* mm2 is mm2 masked to new min's */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
553 " pandn %%mm1,%%mm3;\n" /* mm3 is mm1 masked to its min's */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
554 " por %%mm3,%%mm2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
555 " movd %%mm2,%%eax;\n" /* ax is min so far */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
556 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
557 " addl $2,%%edx;\n" /* now dx = top-2 */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
558 " cmpl %%edx,%%esi;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
559 " ja 6f;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
560 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
561 /* Here, there is one word of input left */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
562 " cmpw (%%esi),%%cx;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
563 " jge 9f;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
564 " movw (%%esi),%%cx;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
565 " .p2align 2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
566 "9:\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
567 " cmpw (%%esi),%%ax;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
568 " jle 6f;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
569 " movw (%%esi),%%ax;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
570 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
571 " .p2align 2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
572 "6:\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
573 /* (finally!) cx is the max, ax the min */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
574 " movswl %%cx,%%ecx;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
575 " movswl %%ax,%%eax;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
576 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
577 " popl %%edx;\n" /* ptr to output max,min vals */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
578 " andl %%edx,%%edx;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
579 " jz 7f;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
580 " movw %%cx,(%%edx);\n" /* max */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
581 " movw %%ax,2(%%edx);\n" /* min */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
582 " .p2align 2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
583 "7:\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
584 /* Now calculate max absolute value */ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
585 " negl %%eax;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
586 " cmpl %%ecx,%%eax;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
587 " jge 8f;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
588 " movl %%ecx,%%eax;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
589 " .p2align 2;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
590 "8:\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
591 " emms;\n" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
592 : "=a" (max) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
593 : "S" (x), "a" (n), "d" (out), [lower] "m" (lower_bound), [upper] "m" (upper_bound) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
594 : "ecx" |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
595 ); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
596 #else |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
597 int i; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
598 int16_t min; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
599 int16_t max; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
600 int16_t temp; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
601 int32_t z; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
602 |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
603 max = INT16_MIN; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
604 min = INT16_MAX; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
605 for (i = 0; i < n; i++) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
606 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
607 temp = x[i]; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
608 if (temp > max) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
609 max = temp; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
610 /*endif*/ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
611 if (temp < min) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
612 min = temp; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
613 /*endif*/ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
614 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
615 /*endfor*/ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
616 if (out) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
617 { |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
618 out[0] = max; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
619 out[1] = min; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
620 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
621 z = abs(min); |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
622 if (z > max) |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
623 return z; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
624 #endif |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
625 return max; |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
626 } |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
627 /*- End of function --------------------------------------------------------*/ |
26cd8f1ef0b1
import spandsp-0.0.6pre17
Peter Meerwald <pmeerw@cosy.sbg.ac.at>
parents:
diff
changeset
|
628 /*- End of file ------------------------------------------------------------*/ |