audiostuff: spandsp-0.0.3/spandsp-0.0.3/src/spandsp/fir.h comparison

comparison spandsp-0.0.3/spandsp-0.0.3/src/spandsp/fir.h @ 5:f762bf195c4b

import spandsp-0.0.3

author	Peter Meerwald <pmeerw@cosy.sbg.ac.at>
date	Fri, 25 Jun 2010 16:00:21 +0200
parents
children

comparison

equal deleted inserted replaced

-:26cd8f1ef0b1
+:f762bf195c4b
+/*
+* SpanDSP - a series of DSP components for telephony
+*
+* fir.h - General telephony FIR routines
+*
+* Written by Steve Underwood <steveu@coppice.org>
+*
+* Copyright (C) 2002 Steve Underwood
+*
+* All rights reserved.
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License version 2, as
+* published by the Free Software Foundation.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; if not, write to the Free Software
+* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*
+* $Id: fir.h,v 1.8 2006/10/24 13:45:28 steveu Exp $
+*/
+/*! \page fir_page FIR filtering
+\section fir_page_sec_1 What does it do?
+???.
+\section fir_page_sec_2 How does it work?
+???.
+*/
+#if !defined(_FIR_H_)
+#define _FIR_H_
+/*
+Blackfin NOTES & IDEAS:
+A simple dot product function is used to implement the filter.  This performs
+just one MAC/cycle which is inefficient but was easy to implement as a first
+pass.  The current Blackfin code also uses an unrolled form of the filter
+history to avoid 0 length hardware loop issues.  This is wasteful of
+memory.
+Ideas for improvement:
+1/ Rewrite filter for dual MAC inner loop.  The issue here is handling
+history sample offsets that are 16 bit aligned - the dual MAC needs
+32 bit aligmnent.  There are some good examples in libbfdsp.
+2/ Use the hardware circular buffer facility tohalve memory usage.
+3/ Consider using internal memory.
+Using less memory might also improve speed as cache misses will be
+reduced. A drop in MIPs and memory approaching 50% should be
+possible.
+The foreground and background filters currenlty use a total of
+about 10 MIPs/ch as measured with speedtest.c on a 256 TAP echo
+can.
+*/
+#if defined(USE_MMX)  ||  defined(USE_SSE2)
+#include "mmx.h"
+#endif
+/*!
+16 bit integer FIR descriptor. This defines the working state for a single
+instance of an FIR filter using 16 bit integer coefficients.
+*/
+typedef struct
+{
+int taps;
+int curr_pos;
+const int16_t *coeffs;
+int16_t *history;
+} fir16_state_t;
+/*!
+32 bit integer FIR descriptor. This defines the working state for a single
+instance of an FIR filter using 32 bit integer coefficients, and filtering
+16 bit integer data.
+*/
+typedef struct
+{
+int taps;
+int curr_pos;
+const int32_t *coeffs;
+int16_t *history;
+} fir32_state_t;
+/*!
+Floating point FIR descriptor. This defines the working state for a single
+instance of an FIR filter using floating point coefficients and data.
+*/
+typedef struct
+{
+int taps;
+int curr_pos;
+const float *coeffs;
+float *history;
+} fir_float_state_t;
+#ifdef __cplusplus
+extern "C" {
+#endif
+static __inline__ const int16_t *fir16_create(fir16_state_t *fir,
+const int16_t *coeffs,
+int taps)
+{
+fir->taps = taps;
+fir->curr_pos = taps - 1;
+fir->coeffs = coeffs;
+#if defined(USE_MMX)  ||  defined(USE_SSE2) || defined(__BLACKFIN_ASM__)
+if ((fir->history = malloc(2*taps*sizeof(int16_t))))
+memset(fir->history, 0, 2*taps*sizeof(int16_t));
+#else
+if ((fir->history = (int16_t *) malloc(taps*sizeof(int16_t))))
+memset(fir->history, 0, taps*sizeof(int16_t));
+#endif
+return fir->history;
+}
+/*- End of function --------------------------------------------------------*/
+static __inline__ void fir16_flush(fir16_state_t *fir)
+{
+#if defined(USE_MMX)  ||  defined(USE_SSE2) || defined(__BLACKFIN_ASM__)
+memset(fir->history, 0, 2*fir->taps*sizeof(int16_t));
+#else
+memset(fir->history, 0, fir->taps*sizeof(int16_t));
+#endif
+}
+/*- End of function --------------------------------------------------------*/
+static __inline__ void fir16_free(fir16_state_t *fir)
+{
+free(fir->history);
+}
+/*- End of function --------------------------------------------------------*/
+#ifdef __BLACKFIN_ASM__
+static inline int32_t dot_asm(short *x, short *y, int len)
+{
+int dot;
+len--;
+__asm__
+(
+"I0 = %1;\n\t"
+"I1 = %2;\n\t"
+"A0 = 0;\n\t"
+"R0.L = W[I0++] || R1.L = W[I1++];\n\t"
+"LOOP dot%= LC0 = %3;\n\t"
+"LOOP_BEGIN dot%=;\n\t"
+"A0 += R0.L * R1.L (IS) || R0.L = W[I0++] || R1.L = W[I1++];\n\t"
+"LOOP_END dot%=;\n\t"
+"A0 += R0.L*R1.L (IS);\n\t"
+"R0 = A0;\n\t"
+"%0 = R0;\n\t"
+: "=&d" (dot)
+: "a" (x), "a" (y), "a" (len)
+: "I0", "I1", "A1", "A0", "R0", "R1"
+);
+return dot;
+}
+#endif
+/*- End of function --------------------------------------------------------*/
+static __inline__ int16_t fir16(fir16_state_t *fir, int16_t sample)
+{
+int32_t y;
+#if defined(USE_MMX)
+int i;
+mmx_t *mmx_coeffs;
+mmx_t *mmx_hist;
+fir->history[fir->curr_pos] = sample;
+fir->history[fir->curr_pos + fir->taps] = sample;
+mmx_coeffs = (mmx_t *) fir->coeffs;
+mmx_hist = (mmx_t *) &fir->history[fir->curr_pos];
+i = fir->taps;
+pxor_r2r(mm4, mm4);
+/* 8 samples per iteration, so the filter must be a multiple of 8 long. */
+while (i > 0)
+{
+movq_m2r(mmx_coeffs[0], mm0);
+movq_m2r(mmx_coeffs[1], mm2);
+movq_m2r(mmx_hist[0], mm1);
+movq_m2r(mmx_hist[1], mm3);
+mmx_coeffs += 2;
+mmx_hist += 2;
+pmaddwd_r2r(mm1, mm0);
+pmaddwd_r2r(mm3, mm2);
+paddd_r2r(mm0, mm4);
+paddd_r2r(mm2, mm4);
+i -= 8;
+}
+movq_r2r(mm4, mm0);
+psrlq_i2r(32, mm0);
+paddd_r2r(mm0, mm4);
+movd_r2m(mm4, y);
+emms();
+#elif defined(USE_SSE2)
+int i;
+xmm_t *xmm_coeffs;
+xmm_t *xmm_hist;
+fir->history[fir->curr_pos] = sample;
+fir->history[fir->curr_pos + fir->taps] = sample;
+xmm_coeffs = (xmm_t *) fir->coeffs;
+xmm_hist = (xmm_t *) &fir->history[fir->curr_pos];
+i = fir->taps;
+pxor_r2r(xmm4, xmm4);
+/* 16 samples per iteration, so the filter must be a multiple of 16 long. */
+while (i > 0)
+{
+movdqu_m2r(xmm_coeffs[0], xmm0);
+movdqu_m2r(xmm_coeffs[1], xmm2);
+movdqu_m2r(xmm_hist[0], xmm1);
+movdqu_m2r(xmm_hist[1], xmm3);
+xmm_coeffs += 2;
+xmm_hist += 2;
+pmaddwd_r2r(xmm1, xmm0);
+pmaddwd_r2r(xmm3, xmm2);
+paddd_r2r(xmm0, xmm4);
+paddd_r2r(xmm2, xmm4);
+i -= 16;
+}
+movdqa_r2r(xmm4, xmm0);
+psrldq_i2r(8, xmm0);
+paddd_r2r(xmm0, xmm4);
+movdqa_r2r(xmm4, xmm0);
+psrldq_i2r(4, xmm0);
+paddd_r2r(xmm0, xmm4);
+movd_r2m(xmm4, y);
+#elif defined(__BLACKFIN_ASM__)
+fir->history[fir->curr_pos] = sample;
+fir->history[fir->curr_pos + fir->taps] = sample;
+y = dot_asm((int16_t*)fir->coeffs, &fir->history[fir->curr_pos], fir->taps);
+#else
+int i;
+int offset1;
+int offset2;
+fir->history[fir->curr_pos] = sample;
+offset2 = fir->curr_pos;
+offset1 = fir->taps - offset2;
+y = 0;
+for (i = fir->taps - 1;  i >= offset1;  i--)
+y += fir->coeffs[i]*fir->history[i - offset1];
+for (  ;  i >= 0;  i--)
+y += fir->coeffs[i]*fir->history[i + offset2];
+#endif
+if (fir->curr_pos <= 0)
+	fir->curr_pos = fir->taps;
+fir->curr_pos--;
+return (int16_t) (y >> 15);
+}
+/*- End of function --------------------------------------------------------*/
+static __inline__ const int16_t *fir32_create(fir32_state_t *fir,
+const int32_t *coeffs,
+int taps)
+{
+fir->taps = taps;
+fir->curr_pos = taps - 1;
+fir->coeffs = coeffs;
+fir->history = (int16_t *) malloc(taps*sizeof(int16_t));
+if (fir->history)
+	memset(fir->history, '\0', taps*sizeof(int16_t));
+return fir->history;
+}
+/*- End of function --------------------------------------------------------*/
+static __inline__ void fir32_flush(fir32_state_t *fir)
+{
+memset(fir->history, 0, fir->taps*sizeof(int16_t));
+}
+/*- End of function --------------------------------------------------------*/
+static __inline__ void fir32_free(fir32_state_t *fir)
+{
+free(fir->history);
+}
+/*- End of function --------------------------------------------------------*/
+static __inline__ int16_t fir32(fir32_state_t *fir, int16_t sample)
+{
+int i;
+int32_t y;
+int offset1;
+int offset2;
+fir->history[fir->curr_pos] = sample;
+offset2 = fir->curr_pos;
+offset1 = fir->taps - offset2;
+y = 0;
+for (i = fir->taps - 1;  i >= offset1;  i--)
+y += fir->coeffs[i]*fir->history[i - offset1];
+for (  ;  i >= 0;  i--)
+y += fir->coeffs[i]*fir->history[i + offset2];
+if (fir->curr_pos <= 0)
+	fir->curr_pos = fir->taps;
+fir->curr_pos--;
+return (int16_t) (y >> 15);
+}
+/*- End of function --------------------------------------------------------*/
+#ifndef __KERNEL__
+static __inline__ const float *fir_float_create(fir_float_state_t *fir,
+const float *coeffs,
+	    	    	                        int taps)
+{
+fir->taps = taps;
+fir->curr_pos = taps - 1;
+fir->coeffs = coeffs;
+fir->history = (float *) malloc(taps*sizeof(float));
+if (fir->history)
+memset(fir->history, '\0', taps*sizeof(float));
+return fir->history;
+}
+/*- End of function --------------------------------------------------------*/
+static __inline__ void fir_float_free(fir_float_state_t *fir)
+{
+free(fir->history);
+}
+/*- End of function --------------------------------------------------------*/
+static __inline__ int16_t fir_float(fir_float_state_t *fir, int16_t sample)
+{
+int i;
+float y;
+int offset1;
+int offset2;
+fir->history[fir->curr_pos] = sample;
+offset2 = fir->curr_pos;
+offset1 = fir->taps - offset2;
+y = 0;
+for (i = fir->taps - 1;  i >= offset1;  i--)
+y += fir->coeffs[i]*fir->history[i - offset1];
+for (  ;  i >= 0;  i--)
+y += fir->coeffs[i]*fir->history[i + offset2];
+if (fir->curr_pos <= 0)
+	fir->curr_pos = fir->taps;
+fir->curr_pos--;
+return  (int16_t) y;
+}
+/*- End of function --------------------------------------------------------*/
+#endif
+#ifdef __cplusplus
+}
+#endif
+#endif
+/*- End of file ------------------------------------------------------------*/

Mercurial > hg > audiostuff

comparison spandsp-0.0.3/spandsp-0.0.3/src/spandsp/fir.h @ 5:f762bf195c4b