Mercurial > hg > audiostuff
comparison spandsp-0.0.6pre17/src/complex_vector_float.c @ 4:26cd8f1ef0b1
import spandsp-0.0.6pre17
author | Peter Meerwald <pmeerw@cosy.sbg.ac.at> |
---|---|
date | Fri, 25 Jun 2010 15:50:58 +0200 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
3:c6c5a16ce2f2 | 4:26cd8f1ef0b1 |
---|---|
1 /* | |
2 * SpanDSP - a series of DSP components for telephony | |
3 * | |
4 * complex_vector_float.c - Floating complex vector arithmetic routines. | |
5 * | |
6 * Written by Steve Underwood <steveu@coppice.org> | |
7 * | |
8 * Copyright (C) 2006 Steve Underwood | |
9 * | |
10 * All rights reserved. | |
11 * | |
12 * This program is free software; you can redistribute it and/or modify | |
13 * it under the terms of the GNU Lesser General Public License version 2.1, | |
14 * as published by the Free Software Foundation. | |
15 * | |
16 * This program is distributed in the hope that it will be useful, | |
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
19 * GNU Lesser General Public License for more details. | |
20 * | |
21 * You should have received a copy of the GNU Lesser General Public | |
22 * License along with this program; if not, write to the Free Software | |
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
24 * | |
25 * $Id: complex_vector_float.c,v 1.16 2009/07/12 09:23:09 steveu Exp $ | |
26 */ | |
27 | |
28 /*! \file */ | |
29 | |
30 #if defined(HAVE_CONFIG_H) | |
31 #include "config.h" | |
32 #endif | |
33 | |
34 #include <inttypes.h> | |
35 #include <stdlib.h> | |
36 #include <stdio.h> | |
37 #include <string.h> | |
38 #if defined(HAVE_TGMATH_H) | |
39 #include <tgmath.h> | |
40 #endif | |
41 #if defined(HAVE_MATH_H) | |
42 #include <math.h> | |
43 #endif | |
44 #include <assert.h> | |
45 | |
46 #include "floating_fudge.h" | |
47 #include "mmx_sse_decs.h" | |
48 | |
49 #include "spandsp/telephony.h" | |
50 #include "spandsp/logging.h" | |
51 #include "spandsp/complex.h" | |
52 #include "spandsp/vector_float.h" | |
53 #include "spandsp/complex_vector_float.h" | |
54 | |
55 #if defined(__GNUC__) && defined(SPANDSP_USE_SSE3) | |
56 SPAN_DECLARE(void) cvec_mulf(complexf_t z[], const complexf_t x[], const complexf_t y[], int n) | |
57 { | |
58 int i; | |
59 __m128 n0; | |
60 __m128 n1; | |
61 __m128 n2; | |
62 __m128 n3; | |
63 | |
64 if ((i = n & ~1)) | |
65 { | |
66 i <<= 1; | |
67 for (i -= 4; i >= 0; i -= 4) | |
68 { | |
69 n3 = _mm_loadu_ps((float *) x + i); | |
70 n0 = _mm_moveldup_ps(n3); | |
71 n1 = _mm_loadu_ps((float *) y + i); | |
72 n0 = _mm_mul_ps(n0, n1); | |
73 n1 = _mm_shuffle_ps(n1, n1, 0xB1); | |
74 n2 = _mm_movehdup_ps(n3); | |
75 n2 = _mm_mul_ps(n2, n1); | |
76 n0 = _mm_addsub_ps(n0, n2); | |
77 _mm_storeu_ps((float *) z + i, n0); | |
78 } | |
79 } | |
80 /* Now deal with the last element, which doesn't fill an SSE2 register */ | |
81 switch (n & 1) | |
82 { | |
83 case 1: | |
84 z[n - 1].re = x[n - 1].re*y[n - 1].re - x[n - 1].im*y[n - 1].im; | |
85 z[n - 1].im = x[n - 1].re*y[n - 1].im + x[n - 1].im*y[n - 1].re; | |
86 } | |
87 } | |
88 #else | |
89 SPAN_DECLARE(void) cvec_mulf(complexf_t z[], const complexf_t x[], const complexf_t y[], int n) | |
90 { | |
91 int i; | |
92 | |
93 for (i = 0; i < n; i++) | |
94 { | |
95 z[i].re = x[i].re*y[i].re - x[i].im*y[i].im; | |
96 z[i].im = x[i].re*y[i].im + x[i].im*y[i].re; | |
97 } | |
98 } | |
99 #endif | |
100 /*- End of function --------------------------------------------------------*/ | |
101 | |
102 SPAN_DECLARE(void) cvec_mul(complex_t z[], const complex_t x[], const complex_t y[], int n) | |
103 { | |
104 int i; | |
105 | |
106 for (i = 0; i < n; i++) | |
107 { | |
108 z[i].re = x[i].re*y[i].re - x[i].im*y[i].im; | |
109 z[i].im = x[i].re*y[i].im + x[i].im*y[i].re; | |
110 } | |
111 } | |
112 /*- End of function --------------------------------------------------------*/ | |
113 | |
114 #if defined(HAVE_LONG_DOUBLE) | |
115 SPAN_DECLARE(void) cvec_mull(complexl_t z[], const complexl_t x[], const complexl_t y[], int n) | |
116 { | |
117 int i; | |
118 | |
119 for (i = 0; i < n; i++) | |
120 { | |
121 z[i].re = x[i].re*y[i].re - x[i].im*y[i].im; | |
122 z[i].im = x[i].re*y[i].im + x[i].im*y[i].re; | |
123 } | |
124 } | |
125 /*- End of function --------------------------------------------------------*/ | |
126 #endif | |
127 | |
128 SPAN_DECLARE(complexf_t) cvec_dot_prodf(const complexf_t x[], const complexf_t y[], int n) | |
129 { | |
130 int i; | |
131 complexf_t z; | |
132 | |
133 z = complex_setf(0.0f, 0.0f); | |
134 for (i = 0; i < n; i++) | |
135 { | |
136 z.re += (x[i].re*y[i].re - x[i].im*y[i].im); | |
137 z.im += (x[i].re*y[i].im + x[i].im*y[i].re); | |
138 } | |
139 return z; | |
140 } | |
141 /*- End of function --------------------------------------------------------*/ | |
142 | |
143 SPAN_DECLARE(complex_t) cvec_dot_prod(const complex_t x[], const complex_t y[], int n) | |
144 { | |
145 int i; | |
146 complex_t z; | |
147 | |
148 z = complex_set(0.0, 0.0); | |
149 for (i = 0; i < n; i++) | |
150 { | |
151 z.re += (x[i].re*y[i].re - x[i].im*y[i].im); | |
152 z.im += (x[i].re*y[i].im + x[i].im*y[i].re); | |
153 } | |
154 return z; | |
155 } | |
156 /*- End of function --------------------------------------------------------*/ | |
157 | |
158 #if defined(HAVE_LONG_DOUBLE) | |
159 SPAN_DECLARE(complexl_t) cvec_dot_prodl(const complexl_t x[], const complexl_t y[], int n) | |
160 { | |
161 int i; | |
162 complexl_t z; | |
163 | |
164 z = complex_setl(0.0L, 0.0L); | |
165 for (i = 0; i < n; i++) | |
166 { | |
167 z.re += (x[i].re*y[i].re - x[i].im*y[i].im); | |
168 z.im += (x[i].re*y[i].im + x[i].im*y[i].re); | |
169 } | |
170 return z; | |
171 } | |
172 /*- End of function --------------------------------------------------------*/ | |
173 #endif | |
174 | |
175 SPAN_DECLARE(complexf_t) cvec_circular_dot_prodf(const complexf_t x[], const complexf_t y[], int n, int pos) | |
176 { | |
177 complexf_t z; | |
178 complexf_t z1; | |
179 | |
180 z = cvec_dot_prodf(&x[pos], &y[0], n - pos); | |
181 z1 = cvec_dot_prodf(&x[0], &y[n - pos], pos); | |
182 z = complex_addf(&z, &z1); | |
183 return z; | |
184 } | |
185 /*- End of function --------------------------------------------------------*/ | |
186 | |
187 #define LMS_LEAK_RATE 0.9999f | |
188 | |
189 SPAN_DECLARE(void) cvec_lmsf(const complexf_t x[], complexf_t y[], int n, const complexf_t *error) | |
190 { | |
191 int i; | |
192 | |
193 for (i = 0; i < n; i++) | |
194 { | |
195 /* Leak a little to tame uncontrolled wandering */ | |
196 y[i].re = y[i].re*LMS_LEAK_RATE + (x[i].im*error->im + x[i].re*error->re); | |
197 y[i].im = y[i].im*LMS_LEAK_RATE + (x[i].re*error->im - x[i].im*error->re); | |
198 } | |
199 } | |
200 /*- End of function --------------------------------------------------------*/ | |
201 | |
202 SPAN_DECLARE(void) cvec_circular_lmsf(const complexf_t x[], complexf_t y[], int n, int pos, const complexf_t *error) | |
203 { | |
204 cvec_lmsf(&x[pos], &y[0], n - pos, error); | |
205 cvec_lmsf(&x[0], &y[n - pos], pos, error); | |
206 } | |
207 /*- End of function --------------------------------------------------------*/ | |
208 /*- End of file ------------------------------------------------------------*/ |