Mercurial > hg > audiostuff
annotate spandsp-0.0.6pre17/src/vector_float.c @ 4:26cd8f1ef0b1
import spandsp-0.0.6pre17
| author | Peter Meerwald <pmeerw@cosy.sbg.ac.at> | 
|---|---|
| date | Fri, 25 Jun 2010 15:50:58 +0200 | 
| parents | |
| children | 
| rev | line source | 
|---|---|
| 4 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 1 /* | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 2 * SpanDSP - a series of DSP components for telephony | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 3 * | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 4 * vector_float.c - Floating vector arithmetic routines. | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 5 * | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 6 * Written by Steve Underwood <steveu@coppice.org> | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 7 * | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 8 * Copyright (C) 2006 Steve Underwood | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 9 * | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 10 * All rights reserved. | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 11 * | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 12 * This program is free software; you can redistribute it and/or modify | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 13 * it under the terms of the GNU Lesser General Public License version 2.1, | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 14 * as published by the Free Software Foundation. | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 15 * | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 16 * This program is distributed in the hope that it will be useful, | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 19 * GNU Lesser General Public License for more details. | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 20 * | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 21 * You should have received a copy of the GNU Lesser General Public | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 22 * License along with this program; if not, write to the Free Software | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 24 * | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 25 * $Id: vector_float.c,v 1.22 2009/07/12 09:23:09 steveu Exp $ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 26 */ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 27 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 28 /*! \file */ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 29 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 30 #if defined(HAVE_CONFIG_H) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 31 #include "config.h" | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 32 #endif | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 33 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 34 #include <inttypes.h> | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 35 #include <stdlib.h> | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 36 #include <stdio.h> | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 37 #include <string.h> | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 38 #if defined(HAVE_TGMATH_H) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 39 #include <tgmath.h> | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 40 #endif | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 41 #if defined(HAVE_MATH_H) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 42 #include <math.h> | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 43 #endif | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 44 #include <assert.h> | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 45 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 46 #include "floating_fudge.h" | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 47 #include "mmx_sse_decs.h" | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 48 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 49 #include "spandsp/telephony.h" | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 50 #include "spandsp/vector_float.h" | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 51 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 52 #if defined(__GNUC__) && defined(SPANDSP_USE_SSE2) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 53 SPAN_DECLARE(void) vec_copyf(float z[], const float x[], int n) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 54 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 55 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 56 __m128 n1; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 57 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 58 if ((i = n & ~3)) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 59 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 60 for (i -= 4; i >= 0; i -= 4) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 61 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 62 n1 = _mm_loadu_ps(x + i); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 63 _mm_storeu_ps(z + i, n1); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 64 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 65 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 66 /* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 67 switch (n & 3) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 68 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 69 case 3: | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 70 z[n - 3] = x[n - 3]; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 71 case 2: | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 72 z[n - 2] = x[n - 2]; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 73 case 1: | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 74 z[n - 1] = x[n - 1]; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 75 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 76 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 77 #else | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 78 SPAN_DECLARE(void) vec_copyf(float z[], const float x[], int n) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 79 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 80 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 81 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 82 for (i = 0; i < n; i++) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 83 z[i] = x[i]; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 84 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 85 #endif | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 86 /*- End of function --------------------------------------------------------*/ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 87 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 88 SPAN_DECLARE(void) vec_copy(double z[], const double x[], int n) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 89 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 90 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 91 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 92 for (i = 0; i < n; i++) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 93 z[i] = x[i]; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 94 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 95 /*- End of function --------------------------------------------------------*/ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 96 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 97 #if defined(HAVE_LONG_DOUBLE) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 98 SPAN_DECLARE(void) vec_copyl(long double z[], const long double x[], int n) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 99 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 100 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 101 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 102 for (i = 0; i < n; i++) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 103 z[i] = x[i]; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 104 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 105 /*- End of function --------------------------------------------------------*/ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 106 #endif | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 107 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 108 #if defined(__GNUC__) && defined(SPANDSP_USE_SSE2) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 109 SPAN_DECLARE(void) vec_negatef(float z[], const float x[], int n) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 110 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 111 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 112 static const uint32_t mask = 0x80000000; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 113 static const float *fmask = (float *) &mask; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 114 __m128 n1; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 115 __m128 n2; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 116 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 117 if ((i = n & ~3)) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 118 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 119 n2 = _mm_set1_ps(*fmask); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 120 for (i -= 4; i >= 0; i -= 4) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 121 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 122 n1 = _mm_loadu_ps(x + i); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 123 n1 = _mm_xor_ps(n1, n2); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 124 _mm_storeu_ps(z + i, n1); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 125 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 126 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 127 /* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 128 switch (n & 3) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 129 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 130 case 3: | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 131 z[n - 3] = -x[n - 3]; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 132 case 2: | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 133 z[n - 2] = -x[n - 2]; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 134 case 1: | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 135 z[n - 1] = -x[n - 1]; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 136 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 137 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 138 #else | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 139 SPAN_DECLARE(void) vec_negatef(float z[], const float x[], int n) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 140 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 141 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 142 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 143 for (i = 0; i < n; i++) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 144 z[i] = -x[i]; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 145 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 146 #endif | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 147 /*- End of function --------------------------------------------------------*/ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 148 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 149 SPAN_DECLARE(void) vec_negate(double z[], const double x[], int n) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 150 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 151 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 152 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 153 for (i = 0; i < n; i++) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 154 z[i] = -x[i]; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 155 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 156 /*- End of function --------------------------------------------------------*/ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 157 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 158 #if defined(HAVE_LONG_DOUBLE) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 159 SPAN_DECLARE(void) vec_negatel(long double z[], const long double x[], int n) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 160 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 161 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 162 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 163 for (i = 0; i < n; i++) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 164 z[i] = -x[i]; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 165 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 166 /*- End of function --------------------------------------------------------*/ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 167 #endif | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 168 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 169 #if defined(__GNUC__) && defined(SPANDSP_USE_SSE2) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 170 SPAN_DECLARE(void) vec_zerof(float z[], int n) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 171 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 172 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 173 __m128 n1; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 174 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 175 if ((i = n & ~3)) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 176 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 177 n1 = _mm_setzero_ps(); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 178 for (i -= 4; i >= 0; i -= 4) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 179 _mm_storeu_ps(z + i, n1); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 180 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 181 /* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 182 switch (n & 3) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 183 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 184 case 3: | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 185 z[n - 3] = 0; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 186 case 2: | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 187 z[n - 2] = 0; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 188 case 1: | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 189 z[n - 1] = 0; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 190 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 191 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 192 #else | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 193 SPAN_DECLARE(void) vec_zerof(float z[], int n) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 194 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 195 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 196 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 197 for (i = 0; i < n; i++) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 198 z[i] = 0.0f; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 199 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 200 #endif | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 201 /*- End of function --------------------------------------------------------*/ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 202 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 203 SPAN_DECLARE(void) vec_zero(double z[], int n) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 204 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 205 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 206 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 207 for (i = 0; i < n; i++) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 208 z[i] = 0.0; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 209 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 210 /*- End of function --------------------------------------------------------*/ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 211 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 212 #if defined(HAVE_LONG_DOUBLE) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 213 SPAN_DECLARE(void) vec_zerol(long double z[], int n) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 214 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 215 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 216 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 217 for (i = 0; i < n; i++) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 218 z[i] = 0.0L; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 219 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 220 /*- End of function --------------------------------------------------------*/ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 221 #endif | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 222 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 223 #if defined(__GNUC__) && defined(SPANDSP_USE_SSE2) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 224 SPAN_DECLARE(void) vec_setf(float z[], float x, int n) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 225 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 226 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 227 __m128 n1; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 228 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 229 if ((i = n & ~3)) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 230 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 231 n1 = _mm_set1_ps(x); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 232 for (i -= 4; i >= 0; i -= 4) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 233 _mm_storeu_ps(z + i, n1); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 234 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 235 /* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 236 switch (n & 3) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 237 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 238 case 3: | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 239 z[n - 3] = x; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 240 case 2: | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 241 z[n - 2] = x; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 242 case 1: | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 243 z[n - 1] = x; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 244 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 245 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 246 #else | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 247 SPAN_DECLARE(void) vec_setf(float z[], float x, int n) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 248 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 249 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 250 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 251 for (i = 0; i < n; i++) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 252 z[i] = x; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 253 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 254 #endif | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 255 /*- End of function --------------------------------------------------------*/ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 256 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 257 SPAN_DECLARE(void) vec_set(double z[], double x, int n) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 258 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 259 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 260 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 261 for (i = 0; i < n; i++) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 262 z[i] = x; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 263 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 264 /*- End of function --------------------------------------------------------*/ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 265 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 266 #if defined(HAVE_LONG_DOUBLE) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 267 SPAN_DECLARE(void) vec_setl(long double z[], long double x, int n) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 268 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 269 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 270 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 271 for (i = 0; i < n; i++) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 272 z[i] = x; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 273 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 274 /*- End of function --------------------------------------------------------*/ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 275 #endif | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 276 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 277 #if defined(__GNUC__) && defined(SPANDSP_USE_SSE2) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 278 SPAN_DECLARE(void) vec_addf(float z[], const float x[], const float y[], int n) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 279 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 280 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 281 __m128 n1; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 282 __m128 n2; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 283 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 284 if ((i = n & ~3)) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 285 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 286 for (i -= 4; i >= 0; i -= 4) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 287 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 288 n1 = _mm_loadu_ps(x + i); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 289 n2 = _mm_loadu_ps(y + i); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 290 n2 = _mm_add_ps(n1, n2); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 291 _mm_storeu_ps(z + i, n2); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 292 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 293 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 294 /* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 295 switch (n & 3) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 296 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 297 case 3: | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 298 z[n - 3] = x[n - 3] + y[n - 3]; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 299 case 2: | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 300 z[n - 2] = x[n - 2] + y[n - 2]; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 301 case 1: | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 302 z[n - 1] = x[n - 1] + y[n - 1]; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 303 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 304 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 305 #else | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 306 SPAN_DECLARE(void) vec_addf(float z[], const float x[], const float y[], int n) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 307 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 308 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 309 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 310 for (i = 0; i < n; i++) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 311 z[i] = x[i] + y[i]; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 312 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 313 #endif | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 314 /*- End of function --------------------------------------------------------*/ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 315 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 316 SPAN_DECLARE(void) vec_add(double z[], const double x[], const double y[], int n) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 317 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 318 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 319 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 320 for (i = 0; i < n; i++) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 321 z[i] = x[i] + y[i]; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 322 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 323 /*- End of function --------------------------------------------------------*/ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 324 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 325 #if defined(HAVE_LONG_DOUBLE) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 326 SPAN_DECLARE(void) vec_addl(long double z[], const long double x[], const long double y[], int n) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 327 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 328 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 329 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 330 for (i = 0; i < n; i++) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 331 z[i] = x[i] + y[i]; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 332 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 333 /*- End of function --------------------------------------------------------*/ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 334 #endif | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 335 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 336 #if defined(__GNUC__) && defined(SPANDSP_USE_SSE2) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 337 SPAN_DECLARE(void) vec_scaledxy_addf(float z[], const float x[], float x_scale, const float y[], float y_scale, int n) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 338 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 339 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 340 __m128 n1; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 341 __m128 n2; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 342 __m128 n3; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 343 __m128 n4; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 344 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 345 if ((i = n & ~3)) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 346 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 347 n3 = _mm_set1_ps(x_scale); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 348 n4 = _mm_set1_ps(y_scale); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 349 for (i -= 4; i >= 0; i -= 4) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 350 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 351 n1 = _mm_loadu_ps(x + i); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 352 n2 = _mm_loadu_ps(y + i); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 353 n1 = _mm_mul_ps(n1, n3); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 354 n2 = _mm_mul_ps(n2, n4); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 355 n2 = _mm_add_ps(n1, n2); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 356 _mm_storeu_ps(z + i, n2); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 357 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 358 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 359 /* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 360 switch (n & 3) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 361 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 362 case 3: | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 363 z[n - 3] = x[n - 3]*x_scale + y[n - 3]*y_scale; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 364 case 2: | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 365 z[n - 2] = x[n - 2]*x_scale + y[n - 2]*y_scale; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 366 case 1: | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 367 z[n - 1] = x[n - 1]*x_scale + y[n - 1]*y_scale; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 368 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 369 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 370 #else | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 371 SPAN_DECLARE(void) vec_scaledxy_addf(float z[], const float x[], float x_scale, const float y[], float y_scale, int n) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 372 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 373 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 374 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 375 for (i = 0; i < n; i++) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 376 z[i] = x[i]*x_scale + y[i]*y_scale; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 377 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 378 #endif | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 379 /*- End of function --------------------------------------------------------*/ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 380 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 381 SPAN_DECLARE(void) vec_scaledxy_add(double z[], const double x[], double x_scale, const double y[], double y_scale, int n) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 382 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 383 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 384 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 385 for (i = 0; i < n; i++) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 386 z[i] = x[i]*x_scale + y[i]*y_scale; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 387 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 388 /*- End of function --------------------------------------------------------*/ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 389 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 390 #if defined(HAVE_LONG_DOUBLE) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 391 SPAN_DECLARE(void) vec_scaledxy_addl(long double z[], const long double x[], long double x_scale, const long double y[], long double y_scale, int n) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 392 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 393 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 394 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 395 for (i = 0; i < n; i++) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 396 z[i] = x[i]*x_scale + y[i]*y_scale; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 397 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 398 /*- End of function --------------------------------------------------------*/ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 399 #endif | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 400 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 401 #if defined(__GNUC__) && defined(SPANDSP_USE_SSE2) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 402 SPAN_DECLARE(void) vec_scaledy_addf(float z[], const float x[], const float y[], float y_scale, int n) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 403 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 404 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 405 __m128 n1; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 406 __m128 n2; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 407 __m128 n3; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 408 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 409 if ((i = n & ~3)) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 410 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 411 n3 = _mm_set1_ps(y_scale); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 412 for (i -= 4; i >= 0; i -= 4) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 413 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 414 n1 = _mm_loadu_ps(x + i); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 415 n2 = _mm_loadu_ps(y + i); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 416 n2 = _mm_mul_ps(n2, n3); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 417 n2 = _mm_add_ps(n1, n2); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 418 _mm_storeu_ps(z + i, n2); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 419 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 420 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 421 /* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 422 switch (n & 3) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 423 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 424 case 3: | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 425 z[n - 3] = x[n - 3] + y[n - 3]*y_scale; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 426 case 2: | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 427 z[n - 2] = x[n - 2] + y[n - 2]*y_scale; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 428 case 1: | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 429 z[n - 1] = x[n - 1] + y[n - 1]*y_scale; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 430 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 431 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 432 #else | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 433 SPAN_DECLARE(void) vec_scaledy_addf(float z[], const float x[], const float y[], float y_scale, int n) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 434 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 435 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 436 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 437 for (i = 0; i < n; i++) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 438 z[i] = x[i] + y[i]*y_scale; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 439 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 440 #endif | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 441 /*- End of function --------------------------------------------------------*/ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 442 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 443 SPAN_DECLARE(void) vec_scaledy_add(double z[], const double x[], const double y[], double y_scale, int n) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 444 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 445 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 446 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 447 for (i = 0; i < n; i++) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 448 z[i] = x[i] + y[i]*y_scale; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 449 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 450 /*- End of function --------------------------------------------------------*/ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 451 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 452 #if defined(HAVE_LONG_DOUBLE) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 453 SPAN_DECLARE(void) vec_scaledy_addl(long double z[], const long double x[], const long double y[], long double y_scale, int n) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 454 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 455 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 456 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 457 for (i = 0; i < n; i++) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 458 z[i] = x[i] + y[i]*y_scale; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 459 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 460 /*- End of function --------------------------------------------------------*/ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 461 #endif | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 462 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 463 #if defined(__GNUC__) && defined(SPANDSP_USE_SSE2) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 464 SPAN_DECLARE(void) vec_subf(float z[], const float x[], const float y[], int n) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 465 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 466 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 467 __m128 n1; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 468 __m128 n2; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 469 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 470 if ((i = n & ~3)) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 471 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 472 for (i -= 4; i >= 0; i -= 4) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 473 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 474 n1 = _mm_loadu_ps(x + i); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 475 n2 = _mm_loadu_ps(y + i); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 476 n2 = _mm_sub_ps(n1, n2); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 477 _mm_storeu_ps(z + i, n2); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 478 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 479 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 480 /* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 481 switch (n & 3) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 482 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 483 case 3: | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 484 z[n - 3] = x[n - 3] - y[n - 3]; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 485 case 2: | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 486 z[n - 2] = x[n - 2] - y[n - 2]; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 487 case 1: | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 488 z[n - 1] = x[n - 1] - y[n - 1]; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 489 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 490 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 491 #else | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 492 SPAN_DECLARE(void) vec_subf(float z[], const float x[], const float y[], int n) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 493 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 494 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 495 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 496 for (i = 0; i < n; i++) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 497 z[i] = x[i] - y[i]; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 498 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 499 #endif | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 500 /*- End of function --------------------------------------------------------*/ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 501 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 502 SPAN_DECLARE(void) vec_sub(double z[], const double x[], const double y[], int n) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 503 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 504 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 505 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 506 for (i = 0; i < n; i++) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 507 z[i] = x[i] - y[i]; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 508 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 509 /*- End of function --------------------------------------------------------*/ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 510 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 511 #if defined(HAVE_LONG_DOUBLE) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 512 SPAN_DECLARE(void) vec_subl(long double z[], const long double x[], const long double y[], int n) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 513 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 514 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 515 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 516 for (i = 0; i < n; i++) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 517 z[i] = x[i] - y[i]; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 518 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 519 /*- End of function --------------------------------------------------------*/ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 520 #endif | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 521 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 522 SPAN_DECLARE(void) vec_scaledxy_subf(float z[], const float x[], float x_scale, const float y[], float y_scale, int n) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 523 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 524 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 525 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 526 for (i = 0; i < n; i++) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 527 z[i] = x[i]*x_scale - y[i]*y_scale; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 528 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 529 /*- End of function --------------------------------------------------------*/ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 530 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 531 SPAN_DECLARE(void) vec_scaledxy_sub(double z[], const double x[], double x_scale, const double y[], double y_scale, int n) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 532 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 533 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 534 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 535 for (i = 0; i < n; i++) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 536 z[i] = x[i]*x_scale - y[i]*y_scale; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 537 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 538 /*- End of function --------------------------------------------------------*/ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 539 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 540 #if defined(HAVE_LONG_DOUBLE) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 541 SPAN_DECLARE(void) vec_scaledxy_subl(long double z[], const long double x[], long double x_scale, const long double y[], long double y_scale, int n) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 542 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 543 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 544 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 545 for (i = 0; i < n; i++) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 546 z[i] = x[i]*x_scale - y[i]*y_scale; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 547 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 548 /*- End of function --------------------------------------------------------*/ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 549 #endif | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 550 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 551 #if defined(__GNUC__) && defined(SPANDSP_USE_SSE2) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 552 SPAN_DECLARE(void) vec_scalar_mulf(float z[], const float x[], float y, int n) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 553 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 554 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 555 __m128 n1; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 556 __m128 n2; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 557 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 558 if ((i = n & ~3)) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 559 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 560 n2 = _mm_set1_ps(y); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 561 for (i -= 4; i >= 0; i -= 4) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 562 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 563 n1 = _mm_loadu_ps(x + i); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 564 n1 = _mm_mul_ps(n1, n2); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 565 _mm_storeu_ps(z + i, n1); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 566 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 567 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 568 /* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 569 switch (n & 3) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 570 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 571 case 3: | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 572 z[n - 3] = x[n - 3]*y; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 573 case 2: | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 574 z[n - 2] = x[n - 2]*y; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 575 case 1: | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 576 z[n - 1] = x[n - 1]*y; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 577 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 578 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 579 #else | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 580 SPAN_DECLARE(void) vec_scalar_mulf(float z[], const float x[], float y, int n) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 581 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 582 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 583 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 584 for (i = 0; i < n; i++) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 585 z[i] = x[i]*y; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 586 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 587 #endif | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 588 /*- End of function --------------------------------------------------------*/ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 589 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 590 SPAN_DECLARE(void) vec_scalar_mul(double z[], const double x[], double y, int n) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 591 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 592 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 593 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 594 for (i = 0; i < n; i++) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 595 z[i] = x[i]*y; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 596 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 597 /*- End of function --------------------------------------------------------*/ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 598 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 599 #if defined(__GNUC__) && defined(SPANDSP_USE_SSE2) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 600 SPAN_DECLARE(void) vec_scalar_addf(float z[], const float x[], float y, int n) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 601 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 602 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 603 __m128 n1; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 604 __m128 n2; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 605 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 606 if ((i = n & ~3)) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 607 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 608 n2 = _mm_set1_ps(y); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 609 for (i -= 4; i >= 0; i -= 4) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 610 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 611 n1 = _mm_loadu_ps(x + i); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 612 n1 = _mm_add_ps(n1, n2); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 613 _mm_storeu_ps(z + i, n1); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 614 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 615 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 616 /* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 617 switch (n & 3) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 618 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 619 case 3: | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 620 z[n - 3] = x[n - 3] + y; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 621 case 2: | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 622 z[n - 2] = x[n - 2] + y; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 623 case 1: | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 624 z[n - 1] = x[n - 1] + y; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 625 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 626 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 627 #else | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 628 SPAN_DECLARE(void) vec_scalar_addf(float z[], const float x[], float y, int n) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 629 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 630 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 631 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 632 for (i = 0; i < n; i++) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 633 z[i] = x[i] + y; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 634 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 635 #endif | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 636 /*- End of function --------------------------------------------------------*/ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 637 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 638 SPAN_DECLARE(void) vec_scalar_add(double z[], const double x[], double y, int n) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 639 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 640 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 641 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 642 for (i = 0; i < n; i++) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 643 z[i] = x[i] + y; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 644 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 645 /*- End of function --------------------------------------------------------*/ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 646 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 647 #if defined(HAVE_LONG_DOUBLE) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 648 SPAN_DECLARE(void) vec_scalar_addl(long double z[], const long double x[], long double y, int n) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 649 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 650 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 651 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 652 for (i = 0; i < n; i++) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 653 z[i] = x[i] + y; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 654 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 655 /*- End of function --------------------------------------------------------*/ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 656 #endif | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 657 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 658 #if defined(__GNUC__) && defined(SPANDSP_USE_SSE2) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 659 SPAN_DECLARE(void) vec_scalar_subf(float z[], const float x[], float y, int n) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 660 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 661 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 662 __m128 n1; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 663 __m128 n2; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 664 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 665 if ((i = n & ~3)) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 666 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 667 n2 = _mm_set1_ps(y); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 668 for (i -= 4; i >= 0; i -= 4) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 669 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 670 n1 = _mm_loadu_ps(x + i); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 671 n1 = _mm_sub_ps(n1, n2); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 672 _mm_storeu_ps(z + i, n1); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 673 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 674 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 675 /* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 676 switch (n & 3) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 677 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 678 case 3: | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 679 z[n - 3] = x[n - 3] - y; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 680 case 2: | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 681 z[n - 2] = x[n - 2] - y; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 682 case 1: | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 683 z[n - 1] = x[n - 1] - y; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 684 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 685 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 686 #else | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 687 SPAN_DECLARE(void) vec_scalar_subf(float z[], const float x[], float y, int n) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 688 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 689 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 690 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 691 for (i = 0; i < n; i++) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 692 z[i] = x[i] - y; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 693 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 694 #endif | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 695 /*- End of function --------------------------------------------------------*/ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 696 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 697 SPAN_DECLARE(void) vec_scalar_sub(double z[], const double x[], double y, int n) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 698 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 699 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 700 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 701 for (i = 0; i < n; i++) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 702 z[i] = x[i] - y; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 703 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 704 /*- End of function --------------------------------------------------------*/ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 705 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 706 #if defined(HAVE_LONG_DOUBLE) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 707 SPAN_DECLARE(void) vec_scalar_subl(long double z[], const long double x[], long double y, int n) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 708 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 709 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 710 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 711 for (i = 0; i < n; i++) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 712 z[i] = x[i] - y; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 713 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 714 /*- End of function --------------------------------------------------------*/ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 715 #endif | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 716 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 717 #if defined(__GNUC__) && defined(SPANDSP_USE_SSE2) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 718 SPAN_DECLARE(void) vec_mulf(float z[], const float x[], const float y[], int n) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 719 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 720 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 721 __m128 n1; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 722 __m128 n2; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 723 __m128 n3; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 724 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 725 if ((i = n & ~3)) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 726 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 727 for (i -= 4; i >= 0; i -= 4) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 728 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 729 n1 = _mm_loadu_ps(x + i); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 730 n2 = _mm_loadu_ps(y + i); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 731 n3 = _mm_mul_ps(n1, n2); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 732 _mm_storeu_ps(z + i, n3); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 733 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 734 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 735 /* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 736 switch (n & 3) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 737 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 738 case 3: | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 739 z[n - 3] = x[n - 3]*y[n - 3]; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 740 case 2: | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 741 z[n - 2] = x[n - 2]*y[n - 2]; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 742 case 1: | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 743 z[n - 1] = x[n - 1]*y[n - 1]; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 744 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 745 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 746 #else | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 747 SPAN_DECLARE(void) vec_mulf(float z[], const float x[], const float y[], int n) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 748 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 749 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 750 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 751 for (i = 0; i < n; i++) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 752 z[i] = x[i]*y[i]; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 753 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 754 /*- End of function --------------------------------------------------------*/ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 755 #endif | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 756 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 757 SPAN_DECLARE(void) vec_mul(double z[], const double x[], const double y[], int n) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 758 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 759 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 760 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 761 for (i = 0; i < n; i++) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 762 z[i] = x[i]*y[i]; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 763 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 764 /*- End of function --------------------------------------------------------*/ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 765 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 766 #if defined(HAVE_LONG_DOUBLE) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 767 SPAN_DECLARE(void) vec_mull(long double z[], const long double x[], const long double y[], int n) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 768 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 769 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 770 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 771 for (i = 0; i < n; i++) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 772 z[i] = x[i]*y[i]; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 773 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 774 /*- End of function --------------------------------------------------------*/ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 775 #endif | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 776 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 777 #if defined(__GNUC__) && defined(SPANDSP_USE_SSE2) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 778 SPAN_DECLARE(float) vec_dot_prodf(const float x[], const float y[], int n) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 779 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 780 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 781 float z; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 782 __m128 n1; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 783 __m128 n2; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 784 __m128 n3; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 785 __m128 n4; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 786 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 787 z = 0.0f; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 788 if ((i = n & ~3)) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 789 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 790 n4 = _mm_setzero_ps(); //sets sum to zero | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 791 for (i -= 4; i >= 0; i -= 4) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 792 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 793 n1 = _mm_loadu_ps(x + i); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 794 n2 = _mm_loadu_ps(y + i); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 795 n3 = _mm_mul_ps(n1, n2); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 796 n4 = _mm_add_ps(n4, n3); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 797 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 798 n4 = _mm_add_ps(_mm_movehl_ps(n4, n4), n4); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 799 n4 = _mm_add_ss(_mm_shuffle_ps(n4, n4, 1), n4); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 800 _mm_store_ss(&z, n4); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 801 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 802 /* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 803 switch (n & 3) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 804 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 805 case 3: | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 806 z += x[n - 3]*y[n - 3]; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 807 case 2: | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 808 z += x[n - 2]*y[n - 2]; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 809 case 1: | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 810 z += x[n - 1]*y[n - 1]; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 811 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 812 return z; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 813 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 814 #else | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 815 SPAN_DECLARE(float) vec_dot_prodf(const float x[], const float y[], int n) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 816 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 817 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 818 float z; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 819 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 820 z = 0.0f; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 821 for (i = 0; i < n; i++) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 822 z += x[i]*y[i]; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 823 return z; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 824 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 825 /*- End of function --------------------------------------------------------*/ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 826 #endif | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 827 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 828 SPAN_DECLARE(double) vec_dot_prod(const double x[], const double y[], int n) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 829 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 830 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 831 double z; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 832 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 833 z = 0.0; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 834 for (i = 0; i < n; i++) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 835 z += x[i]*y[i]; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 836 return z; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 837 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 838 /*- End of function --------------------------------------------------------*/ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 839 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 840 #if defined(HAVE_LONG_DOUBLE) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 841 SPAN_DECLARE(long double) vec_dot_prodl(const long double x[], const long double y[], int n) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 842 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 843 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 844 long double z; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 845 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 846 z = 0.0L; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 847 for (i = 0; i < n; i++) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 848 z += x[i]*y[i]; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 849 return z; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 850 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 851 /*- End of function --------------------------------------------------------*/ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 852 #endif | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 853 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 854 SPAN_DECLARE(float) vec_circular_dot_prodf(const float x[], const float y[], int n, int pos) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 855 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 856 float z; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 857 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 858 z = vec_dot_prodf(&x[pos], &y[0], n - pos); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 859 z += vec_dot_prodf(&x[0], &y[n - pos], pos); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 860 return z; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 861 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 862 /*- End of function --------------------------------------------------------*/ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 863 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 864 #define LMS_LEAK_RATE 0.9999f | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 865 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 866 #if defined(__GNUC__) && defined(SPANDSP_USE_SSE2) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 867 SPAN_DECLARE(void) vec_lmsf(const float x[], float y[], int n, float error) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 868 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 869 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 870 __m128 n1; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 871 __m128 n2; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 872 __m128 n3; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 873 __m128 n4; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 874 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 875 if ((i = n & ~3)) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 876 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 877 n3 = _mm_set1_ps(error); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 878 n4 = _mm_set1_ps(LMS_LEAK_RATE); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 879 for (i -= 4; i >= 0; i -= 4) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 880 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 881 n1 = _mm_loadu_ps(x + i); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 882 n2 = _mm_loadu_ps(y + i); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 883 n1 = _mm_mul_ps(n1, n3); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 884 n2 = _mm_mul_ps(n2, n4); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 885 n1 = _mm_add_ps(n1, n2); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 886 _mm_storeu_ps(y + i, n1); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 887 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 888 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 889 /* Now deal with the last 1 to 3 elements, which don't fill an SSE2 register */ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 890 switch (n & 3) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 891 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 892 case 3: | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 893 y[n - 3] = y[n - 3]*LMS_LEAK_RATE + x[n - 3]*error; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 894 case 2: | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 895 y[n - 2] = y[n - 2]*LMS_LEAK_RATE + x[n - 2]*error; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 896 case 1: | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 897 y[n - 1] = y[n - 1]*LMS_LEAK_RATE + x[n - 1]*error; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 898 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 899 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 900 #else | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 901 SPAN_DECLARE(void) vec_lmsf(const float x[], float y[], int n, float error) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 902 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 903 int i; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 904 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 905 for (i = 0; i < n; i++) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 906 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 907 /* Leak a little to tame uncontrolled wandering */ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 908 y[i] = y[i]*LMS_LEAK_RATE + x[i]*error; | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 909 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 910 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 911 #endif | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 912 /*- End of function --------------------------------------------------------*/ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 913 | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 914 SPAN_DECLARE(void) vec_circular_lmsf(const float x[], float y[], int n, int pos, float error) | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 915 { | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 916 vec_lmsf(&x[pos], &y[0], n - pos, error); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 917 vec_lmsf(&x[0], &y[n - pos], pos, error); | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 918 } | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 919 /*- End of function --------------------------------------------------------*/ | 
| 
26cd8f1ef0b1
import spandsp-0.0.6pre17
 Peter Meerwald <pmeerw@cosy.sbg.ac.at> parents: diff
changeset | 920 /*- End of file ------------------------------------------------------------*/ | 
