comparison spandsp-0.0.6pre17/src/vector_float.c @ 4:26cd8f1ef0b1

import spandsp-0.0.6pre17
author Peter Meerwald <pmeerw@cosy.sbg.ac.at>
date Fri, 25 Jun 2010 15:50:58 +0200
parents
children
comparison
equal deleted inserted replaced
3:c6c5a16ce2f2 4:26cd8f1ef0b1
1 /*
2 * SpanDSP - a series of DSP components for telephony
3 *
4 * vector_float.c - Floating vector arithmetic routines.
5 *
6 * Written by Steve Underwood <steveu@coppice.org>
7 *
8 * Copyright (C) 2006 Steve Underwood
9 *
10 * All rights reserved.
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU Lesser General Public License version 2.1,
14 * as published by the Free Software Foundation.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU Lesser General Public License for more details.
20 *
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 *
25 * $Id: vector_float.c,v 1.22 2009/07/12 09:23:09 steveu Exp $
26 */
27
28 /*! \file */
29
30 #if defined(HAVE_CONFIG_H)
31 #include "config.h"
32 #endif
33
34 #include <inttypes.h>
35 #include <stdlib.h>
36 #include <stdio.h>
37 #include <string.h>
38 #if defined(HAVE_TGMATH_H)
39 #include <tgmath.h>
40 #endif
41 #if defined(HAVE_MATH_H)
42 #include <math.h>
43 #endif
44 #include <assert.h>
45
46 #include "floating_fudge.h"
47 #include "mmx_sse_decs.h"
48
49 #include "spandsp/telephony.h"
50 #include "spandsp/vector_float.h"
51
52 #if defined(__GNUC__) && defined(SPANDSP_USE_SSE2)
53 SPAN_DECLARE(void) vec_copyf(float z[], const float x[], int n)
54 {
55 int i;
56 __m128 n1;
57
58 if ((i = n & ~3))
59 {
60 for (i -= 4; i >= 0; i -= 4)
61 {
62 n1 = _mm_loadu_ps(x + i);
63 _mm_storeu_ps(z + i, n1);
64 }
65 }
66 /* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */
67 switch (n & 3)
68 {
69 case 3:
70 z[n - 3] = x[n - 3];
71 case 2:
72 z[n - 2] = x[n - 2];
73 case 1:
74 z[n - 1] = x[n - 1];
75 }
76 }
77 #else
78 SPAN_DECLARE(void) vec_copyf(float z[], const float x[], int n)
79 {
80 int i;
81
82 for (i = 0; i < n; i++)
83 z[i] = x[i];
84 }
85 #endif
86 /*- End of function --------------------------------------------------------*/
87
88 SPAN_DECLARE(void) vec_copy(double z[], const double x[], int n)
89 {
90 int i;
91
92 for (i = 0; i < n; i++)
93 z[i] = x[i];
94 }
95 /*- End of function --------------------------------------------------------*/
96
97 #if defined(HAVE_LONG_DOUBLE)
98 SPAN_DECLARE(void) vec_copyl(long double z[], const long double x[], int n)
99 {
100 int i;
101
102 for (i = 0; i < n; i++)
103 z[i] = x[i];
104 }
105 /*- End of function --------------------------------------------------------*/
106 #endif
107
108 #if defined(__GNUC__) && defined(SPANDSP_USE_SSE2)
109 SPAN_DECLARE(void) vec_negatef(float z[], const float x[], int n)
110 {
111 int i;
112 static const uint32_t mask = 0x80000000;
113 static const float *fmask = (float *) &mask;
114 __m128 n1;
115 __m128 n2;
116
117 if ((i = n & ~3))
118 {
119 n2 = _mm_set1_ps(*fmask);
120 for (i -= 4; i >= 0; i -= 4)
121 {
122 n1 = _mm_loadu_ps(x + i);
123 n1 = _mm_xor_ps(n1, n2);
124 _mm_storeu_ps(z + i, n1);
125 }
126 }
127 /* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */
128 switch (n & 3)
129 {
130 case 3:
131 z[n - 3] = -x[n - 3];
132 case 2:
133 z[n - 2] = -x[n - 2];
134 case 1:
135 z[n - 1] = -x[n - 1];
136 }
137 }
138 #else
139 SPAN_DECLARE(void) vec_negatef(float z[], const float x[], int n)
140 {
141 int i;
142
143 for (i = 0; i < n; i++)
144 z[i] = -x[i];
145 }
146 #endif
147 /*- End of function --------------------------------------------------------*/
148
149 SPAN_DECLARE(void) vec_negate(double z[], const double x[], int n)
150 {
151 int i;
152
153 for (i = 0; i < n; i++)
154 z[i] = -x[i];
155 }
156 /*- End of function --------------------------------------------------------*/
157
158 #if defined(HAVE_LONG_DOUBLE)
159 SPAN_DECLARE(void) vec_negatel(long double z[], const long double x[], int n)
160 {
161 int i;
162
163 for (i = 0; i < n; i++)
164 z[i] = -x[i];
165 }
166 /*- End of function --------------------------------------------------------*/
167 #endif
168
169 #if defined(__GNUC__) && defined(SPANDSP_USE_SSE2)
170 SPAN_DECLARE(void) vec_zerof(float z[], int n)
171 {
172 int i;
173 __m128 n1;
174
175 if ((i = n & ~3))
176 {
177 n1 = _mm_setzero_ps();
178 for (i -= 4; i >= 0; i -= 4)
179 _mm_storeu_ps(z + i, n1);
180 }
181 /* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */
182 switch (n & 3)
183 {
184 case 3:
185 z[n - 3] = 0;
186 case 2:
187 z[n - 2] = 0;
188 case 1:
189 z[n - 1] = 0;
190 }
191 }
192 #else
193 SPAN_DECLARE(void) vec_zerof(float z[], int n)
194 {
195 int i;
196
197 for (i = 0; i < n; i++)
198 z[i] = 0.0f;
199 }
200 #endif
201 /*- End of function --------------------------------------------------------*/
202
203 SPAN_DECLARE(void) vec_zero(double z[], int n)
204 {
205 int i;
206
207 for (i = 0; i < n; i++)
208 z[i] = 0.0;
209 }
210 /*- End of function --------------------------------------------------------*/
211
212 #if defined(HAVE_LONG_DOUBLE)
213 SPAN_DECLARE(void) vec_zerol(long double z[], int n)
214 {
215 int i;
216
217 for (i = 0; i < n; i++)
218 z[i] = 0.0L;
219 }
220 /*- End of function --------------------------------------------------------*/
221 #endif
222
223 #if defined(__GNUC__) && defined(SPANDSP_USE_SSE2)
224 SPAN_DECLARE(void) vec_setf(float z[], float x, int n)
225 {
226 int i;
227 __m128 n1;
228
229 if ((i = n & ~3))
230 {
231 n1 = _mm_set1_ps(x);
232 for (i -= 4; i >= 0; i -= 4)
233 _mm_storeu_ps(z + i, n1);
234 }
235 /* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */
236 switch (n & 3)
237 {
238 case 3:
239 z[n - 3] = x;
240 case 2:
241 z[n - 2] = x;
242 case 1:
243 z[n - 1] = x;
244 }
245 }
246 #else
247 SPAN_DECLARE(void) vec_setf(float z[], float x, int n)
248 {
249 int i;
250
251 for (i = 0; i < n; i++)
252 z[i] = x;
253 }
254 #endif
255 /*- End of function --------------------------------------------------------*/
256
257 SPAN_DECLARE(void) vec_set(double z[], double x, int n)
258 {
259 int i;
260
261 for (i = 0; i < n; i++)
262 z[i] = x;
263 }
264 /*- End of function --------------------------------------------------------*/
265
266 #if defined(HAVE_LONG_DOUBLE)
267 SPAN_DECLARE(void) vec_setl(long double z[], long double x, int n)
268 {
269 int i;
270
271 for (i = 0; i < n; i++)
272 z[i] = x;
273 }
274 /*- End of function --------------------------------------------------------*/
275 #endif
276
277 #if defined(__GNUC__) && defined(SPANDSP_USE_SSE2)
278 SPAN_DECLARE(void) vec_addf(float z[], const float x[], const float y[], int n)
279 {
280 int i;
281 __m128 n1;
282 __m128 n2;
283
284 if ((i = n & ~3))
285 {
286 for (i -= 4; i >= 0; i -= 4)
287 {
288 n1 = _mm_loadu_ps(x + i);
289 n2 = _mm_loadu_ps(y + i);
290 n2 = _mm_add_ps(n1, n2);
291 _mm_storeu_ps(z + i, n2);
292 }
293 }
294 /* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */
295 switch (n & 3)
296 {
297 case 3:
298 z[n - 3] = x[n - 3] + y[n - 3];
299 case 2:
300 z[n - 2] = x[n - 2] + y[n - 2];
301 case 1:
302 z[n - 1] = x[n - 1] + y[n - 1];
303 }
304 }
305 #else
306 SPAN_DECLARE(void) vec_addf(float z[], const float x[], const float y[], int n)
307 {
308 int i;
309
310 for (i = 0; i < n; i++)
311 z[i] = x[i] + y[i];
312 }
313 #endif
314 /*- End of function --------------------------------------------------------*/
315
316 SPAN_DECLARE(void) vec_add(double z[], const double x[], const double y[], int n)
317 {
318 int i;
319
320 for (i = 0; i < n; i++)
321 z[i] = x[i] + y[i];
322 }
323 /*- End of function --------------------------------------------------------*/
324
325 #if defined(HAVE_LONG_DOUBLE)
326 SPAN_DECLARE(void) vec_addl(long double z[], const long double x[], const long double y[], int n)
327 {
328 int i;
329
330 for (i = 0; i < n; i++)
331 z[i] = x[i] + y[i];
332 }
333 /*- End of function --------------------------------------------------------*/
334 #endif
335
336 #if defined(__GNUC__) && defined(SPANDSP_USE_SSE2)
337 SPAN_DECLARE(void) vec_scaledxy_addf(float z[], const float x[], float x_scale, const float y[], float y_scale, int n)
338 {
339 int i;
340 __m128 n1;
341 __m128 n2;
342 __m128 n3;
343 __m128 n4;
344
345 if ((i = n & ~3))
346 {
347 n3 = _mm_set1_ps(x_scale);
348 n4 = _mm_set1_ps(y_scale);
349 for (i -= 4; i >= 0; i -= 4)
350 {
351 n1 = _mm_loadu_ps(x + i);
352 n2 = _mm_loadu_ps(y + i);
353 n1 = _mm_mul_ps(n1, n3);
354 n2 = _mm_mul_ps(n2, n4);
355 n2 = _mm_add_ps(n1, n2);
356 _mm_storeu_ps(z + i, n2);
357 }
358 }
359 /* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */
360 switch (n & 3)
361 {
362 case 3:
363 z[n - 3] = x[n - 3]*x_scale + y[n - 3]*y_scale;
364 case 2:
365 z[n - 2] = x[n - 2]*x_scale + y[n - 2]*y_scale;
366 case 1:
367 z[n - 1] = x[n - 1]*x_scale + y[n - 1]*y_scale;
368 }
369 }
370 #else
371 SPAN_DECLARE(void) vec_scaledxy_addf(float z[], const float x[], float x_scale, const float y[], float y_scale, int n)
372 {
373 int i;
374
375 for (i = 0; i < n; i++)
376 z[i] = x[i]*x_scale + y[i]*y_scale;
377 }
378 #endif
379 /*- End of function --------------------------------------------------------*/
380
381 SPAN_DECLARE(void) vec_scaledxy_add(double z[], const double x[], double x_scale, const double y[], double y_scale, int n)
382 {
383 int i;
384
385 for (i = 0; i < n; i++)
386 z[i] = x[i]*x_scale + y[i]*y_scale;
387 }
388 /*- End of function --------------------------------------------------------*/
389
390 #if defined(HAVE_LONG_DOUBLE)
391 SPAN_DECLARE(void) vec_scaledxy_addl(long double z[], const long double x[], long double x_scale, const long double y[], long double y_scale, int n)
392 {
393 int i;
394
395 for (i = 0; i < n; i++)
396 z[i] = x[i]*x_scale + y[i]*y_scale;
397 }
398 /*- End of function --------------------------------------------------------*/
399 #endif
400
401 #if defined(__GNUC__) && defined(SPANDSP_USE_SSE2)
402 SPAN_DECLARE(void) vec_scaledy_addf(float z[], const float x[], const float y[], float y_scale, int n)
403 {
404 int i;
405 __m128 n1;
406 __m128 n2;
407 __m128 n3;
408
409 if ((i = n & ~3))
410 {
411 n3 = _mm_set1_ps(y_scale);
412 for (i -= 4; i >= 0; i -= 4)
413 {
414 n1 = _mm_loadu_ps(x + i);
415 n2 = _mm_loadu_ps(y + i);
416 n2 = _mm_mul_ps(n2, n3);
417 n2 = _mm_add_ps(n1, n2);
418 _mm_storeu_ps(z + i, n2);
419 }
420 }
421 /* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */
422 switch (n & 3)
423 {
424 case 3:
425 z[n - 3] = x[n - 3] + y[n - 3]*y_scale;
426 case 2:
427 z[n - 2] = x[n - 2] + y[n - 2]*y_scale;
428 case 1:
429 z[n - 1] = x[n - 1] + y[n - 1]*y_scale;
430 }
431 }
432 #else
433 SPAN_DECLARE(void) vec_scaledy_addf(float z[], const float x[], const float y[], float y_scale, int n)
434 {
435 int i;
436
437 for (i = 0; i < n; i++)
438 z[i] = x[i] + y[i]*y_scale;
439 }
440 #endif
441 /*- End of function --------------------------------------------------------*/
442
443 SPAN_DECLARE(void) vec_scaledy_add(double z[], const double x[], const double y[], double y_scale, int n)
444 {
445 int i;
446
447 for (i = 0; i < n; i++)
448 z[i] = x[i] + y[i]*y_scale;
449 }
450 /*- End of function --------------------------------------------------------*/
451
452 #if defined(HAVE_LONG_DOUBLE)
453 SPAN_DECLARE(void) vec_scaledy_addl(long double z[], const long double x[], const long double y[], long double y_scale, int n)
454 {
455 int i;
456
457 for (i = 0; i < n; i++)
458 z[i] = x[i] + y[i]*y_scale;
459 }
460 /*- End of function --------------------------------------------------------*/
461 #endif
462
463 #if defined(__GNUC__) && defined(SPANDSP_USE_SSE2)
464 SPAN_DECLARE(void) vec_subf(float z[], const float x[], const float y[], int n)
465 {
466 int i;
467 __m128 n1;
468 __m128 n2;
469
470 if ((i = n & ~3))
471 {
472 for (i -= 4; i >= 0; i -= 4)
473 {
474 n1 = _mm_loadu_ps(x + i);
475 n2 = _mm_loadu_ps(y + i);
476 n2 = _mm_sub_ps(n1, n2);
477 _mm_storeu_ps(z + i, n2);
478 }
479 }
480 /* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */
481 switch (n & 3)
482 {
483 case 3:
484 z[n - 3] = x[n - 3] - y[n - 3];
485 case 2:
486 z[n - 2] = x[n - 2] - y[n - 2];
487 case 1:
488 z[n - 1] = x[n - 1] - y[n - 1];
489 }
490 }
491 #else
492 SPAN_DECLARE(void) vec_subf(float z[], const float x[], const float y[], int n)
493 {
494 int i;
495
496 for (i = 0; i < n; i++)
497 z[i] = x[i] - y[i];
498 }
499 #endif
500 /*- End of function --------------------------------------------------------*/
501
502 SPAN_DECLARE(void) vec_sub(double z[], const double x[], const double y[], int n)
503 {
504 int i;
505
506 for (i = 0; i < n; i++)
507 z[i] = x[i] - y[i];
508 }
509 /*- End of function --------------------------------------------------------*/
510
511 #if defined(HAVE_LONG_DOUBLE)
512 SPAN_DECLARE(void) vec_subl(long double z[], const long double x[], const long double y[], int n)
513 {
514 int i;
515
516 for (i = 0; i < n; i++)
517 z[i] = x[i] - y[i];
518 }
519 /*- End of function --------------------------------------------------------*/
520 #endif
521
522 SPAN_DECLARE(void) vec_scaledxy_subf(float z[], const float x[], float x_scale, const float y[], float y_scale, int n)
523 {
524 int i;
525
526 for (i = 0; i < n; i++)
527 z[i] = x[i]*x_scale - y[i]*y_scale;
528 }
529 /*- End of function --------------------------------------------------------*/
530
531 SPAN_DECLARE(void) vec_scaledxy_sub(double z[], const double x[], double x_scale, const double y[], double y_scale, int n)
532 {
533 int i;
534
535 for (i = 0; i < n; i++)
536 z[i] = x[i]*x_scale - y[i]*y_scale;
537 }
538 /*- End of function --------------------------------------------------------*/
539
540 #if defined(HAVE_LONG_DOUBLE)
541 SPAN_DECLARE(void) vec_scaledxy_subl(long double z[], const long double x[], long double x_scale, const long double y[], long double y_scale, int n)
542 {
543 int i;
544
545 for (i = 0; i < n; i++)
546 z[i] = x[i]*x_scale - y[i]*y_scale;
547 }
548 /*- End of function --------------------------------------------------------*/
549 #endif
550
551 #if defined(__GNUC__) && defined(SPANDSP_USE_SSE2)
552 SPAN_DECLARE(void) vec_scalar_mulf(float z[], const float x[], float y, int n)
553 {
554 int i;
555 __m128 n1;
556 __m128 n2;
557
558 if ((i = n & ~3))
559 {
560 n2 = _mm_set1_ps(y);
561 for (i -= 4; i >= 0; i -= 4)
562 {
563 n1 = _mm_loadu_ps(x + i);
564 n1 = _mm_mul_ps(n1, n2);
565 _mm_storeu_ps(z + i, n1);
566 }
567 }
568 /* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */
569 switch (n & 3)
570 {
571 case 3:
572 z[n - 3] = x[n - 3]*y;
573 case 2:
574 z[n - 2] = x[n - 2]*y;
575 case 1:
576 z[n - 1] = x[n - 1]*y;
577 }
578 }
579 #else
580 SPAN_DECLARE(void) vec_scalar_mulf(float z[], const float x[], float y, int n)
581 {
582 int i;
583
584 for (i = 0; i < n; i++)
585 z[i] = x[i]*y;
586 }
587 #endif
588 /*- End of function --------------------------------------------------------*/
589
590 SPAN_DECLARE(void) vec_scalar_mul(double z[], const double x[], double y, int n)
591 {
592 int i;
593
594 for (i = 0; i < n; i++)
595 z[i] = x[i]*y;
596 }
597 /*- End of function --------------------------------------------------------*/
598
599 #if defined(__GNUC__) && defined(SPANDSP_USE_SSE2)
600 SPAN_DECLARE(void) vec_scalar_addf(float z[], const float x[], float y, int n)
601 {
602 int i;
603 __m128 n1;
604 __m128 n2;
605
606 if ((i = n & ~3))
607 {
608 n2 = _mm_set1_ps(y);
609 for (i -= 4; i >= 0; i -= 4)
610 {
611 n1 = _mm_loadu_ps(x + i);
612 n1 = _mm_add_ps(n1, n2);
613 _mm_storeu_ps(z + i, n1);
614 }
615 }
616 /* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */
617 switch (n & 3)
618 {
619 case 3:
620 z[n - 3] = x[n - 3] + y;
621 case 2:
622 z[n - 2] = x[n - 2] + y;
623 case 1:
624 z[n - 1] = x[n - 1] + y;
625 }
626 }
627 #else
628 SPAN_DECLARE(void) vec_scalar_addf(float z[], const float x[], float y, int n)
629 {
630 int i;
631
632 for (i = 0; i < n; i++)
633 z[i] = x[i] + y;
634 }
635 #endif
636 /*- End of function --------------------------------------------------------*/
637
638 SPAN_DECLARE(void) vec_scalar_add(double z[], const double x[], double y, int n)
639 {
640 int i;
641
642 for (i = 0; i < n; i++)
643 z[i] = x[i] + y;
644 }
645 /*- End of function --------------------------------------------------------*/
646
647 #if defined(HAVE_LONG_DOUBLE)
648 SPAN_DECLARE(void) vec_scalar_addl(long double z[], const long double x[], long double y, int n)
649 {
650 int i;
651
652 for (i = 0; i < n; i++)
653 z[i] = x[i] + y;
654 }
655 /*- End of function --------------------------------------------------------*/
656 #endif
657
658 #if defined(__GNUC__) && defined(SPANDSP_USE_SSE2)
659 SPAN_DECLARE(void) vec_scalar_subf(float z[], const float x[], float y, int n)
660 {
661 int i;
662 __m128 n1;
663 __m128 n2;
664
665 if ((i = n & ~3))
666 {
667 n2 = _mm_set1_ps(y);
668 for (i -= 4; i >= 0; i -= 4)
669 {
670 n1 = _mm_loadu_ps(x + i);
671 n1 = _mm_sub_ps(n1, n2);
672 _mm_storeu_ps(z + i, n1);
673 }
674 }
675 /* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */
676 switch (n & 3)
677 {
678 case 3:
679 z[n - 3] = x[n - 3] - y;
680 case 2:
681 z[n - 2] = x[n - 2] - y;
682 case 1:
683 z[n - 1] = x[n - 1] - y;
684 }
685 }
686 #else
687 SPAN_DECLARE(void) vec_scalar_subf(float z[], const float x[], float y, int n)
688 {
689 int i;
690
691 for (i = 0; i < n; i++)
692 z[i] = x[i] - y;
693 }
694 #endif
695 /*- End of function --------------------------------------------------------*/
696
697 SPAN_DECLARE(void) vec_scalar_sub(double z[], const double x[], double y, int n)
698 {
699 int i;
700
701 for (i = 0; i < n; i++)
702 z[i] = x[i] - y;
703 }
704 /*- End of function --------------------------------------------------------*/
705
706 #if defined(HAVE_LONG_DOUBLE)
707 SPAN_DECLARE(void) vec_scalar_subl(long double z[], const long double x[], long double y, int n)
708 {
709 int i;
710
711 for (i = 0; i < n; i++)
712 z[i] = x[i] - y;
713 }
714 /*- End of function --------------------------------------------------------*/
715 #endif
716
717 #if defined(__GNUC__) && defined(SPANDSP_USE_SSE2)
718 SPAN_DECLARE(void) vec_mulf(float z[], const float x[], const float y[], int n)
719 {
720 int i;
721 __m128 n1;
722 __m128 n2;
723 __m128 n3;
724
725 if ((i = n & ~3))
726 {
727 for (i -= 4; i >= 0; i -= 4)
728 {
729 n1 = _mm_loadu_ps(x + i);
730 n2 = _mm_loadu_ps(y + i);
731 n3 = _mm_mul_ps(n1, n2);
732 _mm_storeu_ps(z + i, n3);
733 }
734 }
735 /* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */
736 switch (n & 3)
737 {
738 case 3:
739 z[n - 3] = x[n - 3]*y[n - 3];
740 case 2:
741 z[n - 2] = x[n - 2]*y[n - 2];
742 case 1:
743 z[n - 1] = x[n - 1]*y[n - 1];
744 }
745 }
746 #else
747 SPAN_DECLARE(void) vec_mulf(float z[], const float x[], const float y[], int n)
748 {
749 int i;
750
751 for (i = 0; i < n; i++)
752 z[i] = x[i]*y[i];
753 }
754 /*- End of function --------------------------------------------------------*/
755 #endif
756
757 SPAN_DECLARE(void) vec_mul(double z[], const double x[], const double y[], int n)
758 {
759 int i;
760
761 for (i = 0; i < n; i++)
762 z[i] = x[i]*y[i];
763 }
764 /*- End of function --------------------------------------------------------*/
765
766 #if defined(HAVE_LONG_DOUBLE)
767 SPAN_DECLARE(void) vec_mull(long double z[], const long double x[], const long double y[], int n)
768 {
769 int i;
770
771 for (i = 0; i < n; i++)
772 z[i] = x[i]*y[i];
773 }
774 /*- End of function --------------------------------------------------------*/
775 #endif
776
777 #if defined(__GNUC__) && defined(SPANDSP_USE_SSE2)
778 SPAN_DECLARE(float) vec_dot_prodf(const float x[], const float y[], int n)
779 {
780 int i;
781 float z;
782 __m128 n1;
783 __m128 n2;
784 __m128 n3;
785 __m128 n4;
786
787 z = 0.0f;
788 if ((i = n & ~3))
789 {
790 n4 = _mm_setzero_ps(); //sets sum to zero
791 for (i -= 4; i >= 0; i -= 4)
792 {
793 n1 = _mm_loadu_ps(x + i);
794 n2 = _mm_loadu_ps(y + i);
795 n3 = _mm_mul_ps(n1, n2);
796 n4 = _mm_add_ps(n4, n3);
797 }
798 n4 = _mm_add_ps(_mm_movehl_ps(n4, n4), n4);
799 n4 = _mm_add_ss(_mm_shuffle_ps(n4, n4, 1), n4);
800 _mm_store_ss(&z, n4);
801 }
802 /* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */
803 switch (n & 3)
804 {
805 case 3:
806 z += x[n - 3]*y[n - 3];
807 case 2:
808 z += x[n - 2]*y[n - 2];
809 case 1:
810 z += x[n - 1]*y[n - 1];
811 }
812 return z;
813 }
814 #else
815 SPAN_DECLARE(float) vec_dot_prodf(const float x[], const float y[], int n)
816 {
817 int i;
818 float z;
819
820 z = 0.0f;
821 for (i = 0; i < n; i++)
822 z += x[i]*y[i];
823 return z;
824 }
825 /*- End of function --------------------------------------------------------*/
826 #endif
827
828 SPAN_DECLARE(double) vec_dot_prod(const double x[], const double y[], int n)
829 {
830 int i;
831 double z;
832
833 z = 0.0;
834 for (i = 0; i < n; i++)
835 z += x[i]*y[i];
836 return z;
837 }
838 /*- End of function --------------------------------------------------------*/
839
840 #if defined(HAVE_LONG_DOUBLE)
841 SPAN_DECLARE(long double) vec_dot_prodl(const long double x[], const long double y[], int n)
842 {
843 int i;
844 long double z;
845
846 z = 0.0L;
847 for (i = 0; i < n; i++)
848 z += x[i]*y[i];
849 return z;
850 }
851 /*- End of function --------------------------------------------------------*/
852 #endif
853
854 SPAN_DECLARE(float) vec_circular_dot_prodf(const float x[], const float y[], int n, int pos)
855 {
856 float z;
857
858 z = vec_dot_prodf(&x[pos], &y[0], n - pos);
859 z += vec_dot_prodf(&x[0], &y[n - pos], pos);
860 return z;
861 }
862 /*- End of function --------------------------------------------------------*/
863
864 #define LMS_LEAK_RATE 0.9999f
865
866 #if defined(__GNUC__) && defined(SPANDSP_USE_SSE2)
867 SPAN_DECLARE(void) vec_lmsf(const float x[], float y[], int n, float error)
868 {
869 int i;
870 __m128 n1;
871 __m128 n2;
872 __m128 n3;
873 __m128 n4;
874
875 if ((i = n & ~3))
876 {
877 n3 = _mm_set1_ps(error);
878 n4 = _mm_set1_ps(LMS_LEAK_RATE);
879 for (i -= 4; i >= 0; i -= 4)
880 {
881 n1 = _mm_loadu_ps(x + i);
882 n2 = _mm_loadu_ps(y + i);
883 n1 = _mm_mul_ps(n1, n3);
884 n2 = _mm_mul_ps(n2, n4);
885 n1 = _mm_add_ps(n1, n2);
886 _mm_storeu_ps(y + i, n1);
887 }
888 }
889 /* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */
890 switch (n & 3)
891 {
892 case 3:
893 y[n - 3] = y[n - 3]*LMS_LEAK_RATE + x[n - 3]*error;
894 case 2:
895 y[n - 2] = y[n - 2]*LMS_LEAK_RATE + x[n - 2]*error;
896 case 1:
897 y[n - 1] = y[n - 1]*LMS_LEAK_RATE + x[n - 1]*error;
898 }
899 }
900 #else
901 SPAN_DECLARE(void) vec_lmsf(const float x[], float y[], int n, float error)
902 {
903 int i;
904
905 for (i = 0; i < n; i++)
906 {
907 /* Leak a little to tame uncontrolled wandering */
908 y[i] = y[i]*LMS_LEAK_RATE + x[i]*error;
909 }
910 }
911 #endif
912 /*- End of function --------------------------------------------------------*/
913
914 SPAN_DECLARE(void) vec_circular_lmsf(const float x[], float y[], int n, int pos, float error)
915 {
916 vec_lmsf(&x[pos], &y[0], n - pos, error);
917 vec_lmsf(&x[0], &y[n - pos], pos, error);
918 }
919 /*- End of function --------------------------------------------------------*/
920 /*- End of file ------------------------------------------------------------*/

Repositories maintained by Peter Meerwald, pmeerw@pmeerw.net.