Mercurial > hg > audiostuff

diff spandsp-0.0.3/spandsp-0.0.3/src/lpc10_analyse.c @ 5:f762bf195c4b
import spandsp-0.0.3
author: Peter Meerwald <pmeerw@cosy.sbg.ac.at>
date: Fri, 25 Jun 2010 16:00:21 +0200
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spandsp-0.0.3/spandsp-0.0.3/src/lpc10_analyse.c	Fri Jun 25 16:00:21 2010 +0200
@@ -0,0 +1,712 @@
+/*
+ * SpanDSP - a series of DSP components for telephony
+ *
+ * lpc10_analyse.c - LPC10 low bit rate speech codec.
+ *
+ * Written by Steve Underwood <steveu@coppice.org>
+ *
+ * Copyright (C) 2006 Steve Underwood
+ *
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * This code is based on the U.S. Department of Defense reference
+ * implementation of the LPC-10 2400 bps Voice Coder. They do not
+ * exert copyright claims on their code, and it may be freely used.
+ *
+ * $Id: lpc10_analyse.c,v 1.12 2006/11/30 15:41:47 steveu Exp $
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <inttypes.h>
+#include <memory.h>
+#if defined(HAVE_TGMATH_H)
+#include <tgmath.h>
+#endif
+#if defined(HAVE_MATH_H)
+#include <math.h>
+#endif
+
+#include "spandsp/telephony.h"
+#include "spandsp/dc_restore.h"
+#include "spandsp/lpc10.h"
+
+#include "lpc10_encdecs.h"
+
+static __inline__ float energyf(float amp[], int len)
+{
+    int i;
+    float rms;
+
+    rms = 0.0f;
+    for (i = 0;  i < len;  i++)
+	    rms += amp[i]*amp[i];
+    rms = sqrtf(rms/len);
+    return rms;
+}
+/*- End of function --------------------------------------------------------*/
+
+static void remove_dc_bias(float speech[], int len, float sigout[])
+{
+    float bias;
+    int i;
+
+    bias = 0.0f;
+    for (i = 0;  i < len;  i++)
+        bias += speech[i];
+    bias /= len;
+    for (i = 0; i < len;  i++)
+        sigout[i] = speech[i] - bias;
+}
+/*- End of function --------------------------------------------------------*/
+
+static void eval_amdf(float speech[],
+                       int32_t lpita,
+                       const int32_t tau[], 
+                       int32_t ltau,
+                       int32_t maxlag,
+                       float amdf[],
+                       int32_t *minptr,
+                       int32_t *maxptr)
+{
+    float sum;
+    int i;
+    int j;
+    int n1;
+    int n2;
+
+    *minptr = 0;
+    *maxptr = 0;
+    for (i = 0;  i < ltau;  i++)
+    {
+        n1 = (maxlag - tau[i])/2 + 1;
+        n2 = n1 + lpita - 1;
+        sum = 0.0f;
+        for (j = n1;  j <= n2;  j += 4)
+            sum += fabsf(speech[j - 1] - speech[j + tau[i] - 1]);
+        amdf[i] = sum;
+        if (amdf[i] < amdf[*minptr])
+            *minptr = i;
+        if (amdf[i] > amdf[*maxptr])
+            *maxptr = i;
+    }
+}
+/*- End of function --------------------------------------------------------*/
+
+static void eval_highres_amdf(float speech[],
+                              int32_t lpita,
+                              const int32_t tau[], 
+                              int32_t ltau,
+                              float amdf[],
+                              int32_t *minptr,
+                              int32_t *maxptr,
+                              int32_t *mintau)
+{
+    float amdf2[6];
+    int32_t tau2[6];
+    int32_t minp2;
+    int32_t ltau2;
+    int32_t maxp2;
+    int32_t minamd;
+    int i;
+    int i2;
+    int ptr;
+
+    /* Compute full AMDF using log spaced lags, find coarse minimum */
+    eval_amdf(speech, lpita, tau, ltau, tau[ltau - 1], amdf, minptr, maxptr);
+    *mintau = tau[*minptr];
+    minamd = (int32_t) amdf[*minptr];
+
+    /* Build table containing all lags within +/- 3 of the AMDF minimum,
+       excluding all that have already been computed */
+    ltau2 = 0;
+    ptr = *minptr - 2;
+    i2 = min(*mintau + 4, tau[ltau - 1]);
+    for (i = max(*mintau - 3, 41);  i < i2;  i++)
+    {
+        while (tau[ptr] < i)
+            ptr++;
+        if (tau[ptr] != i)
+            tau2[ltau2++] = i;
+    }
+    /* Compute AMDF of the new lags, if there are any, and choose one
+       if it is better than the coarse minimum */
+    if (ltau2 > 0)
+    {
+        eval_amdf(speech, lpita, tau2, ltau2, tau[ltau - 1], amdf2, &minp2, &maxp2);
+        if (amdf2[minp2] < (float) minamd)
+        {
+            *mintau = tau2[minp2];
+            minamd = (int32_t) amdf2[minp2];
+        }
+    }
+    /* Check one octave up, if there are any lags not yet computed */
+    if (*mintau >= 80)
+    {
+        i = *mintau/2;
+        if ((i & 1) == 0)
+        {
+            ltau2 = 2;
+            tau2[0] = i - 1;
+            tau2[1] = i + 1;
+        }
+        else
+        {
+            ltau2 = 1;
+            tau2[0] = i;
+        }
+        eval_amdf(speech, lpita, tau2, ltau2, tau[ltau - 1], amdf2, &minp2, &maxp2);
+        if (amdf2[minp2] < (float) minamd)
+        {
+            *mintau = tau2[minp2];
+            minamd = (int32_t) amdf2[minp2];
+            *minptr -= 20;
+        }
+    }
+    /* Force minimum of the AMDF array to the high resolution minimum */
+    amdf[*minptr] = (float) minamd;
+    /* Find maximum of AMDF within 1/2 octave of minimum */
+    *maxptr = max(*minptr - 5, 0);
+    i2 = min(*minptr + 6, ltau);
+    for (i = *maxptr;  i < i2;  i++)
+    {
+        if (amdf[i] > amdf[*maxptr])
+            *maxptr = i;
+    }
+}
+/*- End of function --------------------------------------------------------*/
+
+static void dynamic_pitch_tracking(lpc10_encode_state_t *s,
+                                   float amdf[],
+                                   int32_t ltau,
+                                   int32_t *minptr,
+                                   int32_t voice,
+                                   int32_t *pitch,
+                                   int32_t *midx)
+{
+    int32_t pbar;
+    float sbar;
+    int32_t path[2];
+    int32_t i;
+    int32_t j;
+    float alpha;
+    float minsc;
+    float maxsc;
+
+    /* Calculate the confidence factor ALPHA, used as a threshold slope in */
+    /* SEESAW.  If unvoiced, set high slope so that every point in P array */
+    /*is marked as a potential pitch frequency.  A scaled up version (ALPHAX )*/
+    /* is used to maintain arithmetic precision. */
+    if (voice == 1)
+        s->alphax = s->alphax*0.75f + amdf[*minptr - 1]*0.5f;
+    else
+        s->alphax *= 0.984375f;
+    alpha = s->alphax/16;
+    if (voice == 0  &&  s->alphax < 128.0f)
+        alpha = 8.0f;
+    /* SEESAW: Construct a pitch pointer array and intermediate winner function */
+    /* Left to right pass: */
+    s->p[s->ipoint][0] = 1;
+    pbar = 1;
+    sbar = s->s[0];
+    for (i = 0;  i < ltau;  i++)
+    {
+        sbar += alpha;
+        if (sbar < s->s[i])
+        {
+            s->s[i] = sbar;
+        }
+        else
+        {
+            pbar = i + 1;
+            sbar = s->s[i];
+        }
+        s->p[s->ipoint][i] = pbar;
+    }
+    /* Right to left pass: */
+    sbar = s->s[pbar - 1];
+    for (i = pbar - 2;  i >= 0;  i--)
+    {
+        sbar += alpha;
+        if (sbar < s->s[i])
+        {
+            s->s[i] = sbar;
+            s->p[s->ipoint][i] = pbar;
+        }
+        else
+        {
+            pbar = s->p[s->ipoint][i];
+            i = pbar - 1;
+            sbar = s->s[i];
+        }
+    }
+    /* Update S using AMDF */
+    /* Find maximum, minimum, and location of minimum */
+    s->s[0] += amdf[0]/2;
+    minsc = s->s[0];
+    maxsc = minsc;
+    *midx = 1;
+    for (i = 1;  i < ltau;  i++)
+    {
+        s->s[i] += amdf[i]/2;
+        if (s->s[i] > maxsc)
+            maxsc = s->s[i];
+        if (s->s[i] < minsc)
+        {
+            *midx = i + 1;
+            minsc = s->s[i];
+        }
+    }
+    /* Subtract MINSC from S to prevent overflow */
+    for (i = 0;  i < ltau;  i++)
+        s->s[i] -= minsc;
+    maxsc -= minsc;
+    /* Use higher octave pitch if significant null there */
+    j = 0;
+    for (i = 20;  i <= 40;  i += 10)
+    {
+        if (*midx > i)
+        {
+            if (s->s[*midx - i - 1] < maxsc / 4)
+                j = i;
+        }
+    }
+    *midx -= j;
+    /* TRACE: look back two frames to find minimum cost pitch estimate */
+    *pitch = *midx;
+    for (i = 0, j = s->ipoint;  i < 2;  i++, j++)
+    {
+        *pitch = s->p[j & 1][*pitch - 1];
+        path[i] = *pitch;
+    }
+
+    /* The following statement subtracts one from IPOINT, mod DEPTH.  I */
+    /* think the author chose to add DEPTH-1, instead of subtracting 1, */
+    /* because then it will work even if MOD doesn't work as desired on */
+    /* negative arguments. */
+    s->ipoint = (s->ipoint + 1) & 1;
+}
+/*- End of function --------------------------------------------------------*/
+
+/* Detection of onsets in (or slightly preceding) the futuremost frame of speech. */
+static void onset(lpc10_encode_state_t *s,
+                   float *pebuf,
+                   int32_t osbuf[],
+                   int32_t *osptr,
+                   int32_t oslen,
+                   int32_t sbufl,
+                   int32_t sbufh,
+                   int32_t lframe)
+{
+    int32_t i;
+    float r1;
+    float l2sum2;
+
+    pebuf -= sbufl;
+
+    if (s->hyst)
+    	s->lasti -= lframe;
+    for (i = sbufh - lframe + 1;  i <= sbufh;  i++)
+    {
+        /* Compute FPC; Use old FPC on divide by zero; Clamp FPC to +/- 1. */
+        s->n = (pebuf[i]*pebuf[i - 1] + s->n*63.0f)/64.0f;
+        /* Computing 2nd power */
+        r1 = pebuf[i - 1];
+        s->d__ = (r1*r1 + s->d__*63.0f)/64.0f;
+        if (s->d__ != 0.0f)
+        {
+            if (fabsf(s->n) > s->d__)
+                s->fpc = r_sign(1.0f, s->n);
+            else
+                s->fpc = s->n/s->d__;
+        }
+        /* Filter FPC */
+        l2sum2 = s->l2buf[s->l2ptr1 - 1];
+        s->l2sum1 = s->l2sum1 - s->l2buf[s->l2ptr2 - 1] + s->fpc;
+        s->l2buf[s->l2ptr2 - 1] = s->l2sum1;
+        s->l2buf[s->l2ptr1 - 1] = s->fpc;
+        s->l2ptr1 = (s->l2ptr1 & 0xF) + 1;
+        s->l2ptr2 = (s->l2ptr2 & 0xF) + 1;
+        if (fabsf(s->l2sum1 - l2sum2) > 1.7f)
+        {
+            if (!s->hyst)
+            {
+                /* Ignore if buffer full */
+                if (*osptr <= oslen)
+                {
+                    osbuf[*osptr - 1] = i - 9;
+                    (*osptr)++;
+                }
+                s->hyst = TRUE;
+            }
+            s->lasti = i;
+            /* After one onset detection, at least OSHYST sample times must go */
+            /* by before another is allowed to occur. */
+        }
+        else if (s->hyst  &&  i - s->lasti >= 10)
+        {
+            s->hyst = FALSE;
+        }
+    }
+}
+/*- End of function --------------------------------------------------------*/
+
+/* Load a covariance matrix. */
+static void mload(int32_t order, int32_t awins, int32_t awinf, float speech[], float phi[], float psi[])
+{
+    int32_t start;
+    int i;
+    int r;
+
+    start = awins + order;
+    for (r = 1;  r <= order;  r++)
+    {
+        phi[r - 1] = 0.0f;
+        for (i = start;  i <= awinf;  i++)
+            phi[r - 1] += speech[i - 2]*speech[i - r - 1];
+    }
+
+    /* Load last element of vector PSI */
+    psi[order - 1] = 0.0f;
+    for (i = start - 1;  i < awinf;  i++)
+        psi[order - 1] += speech[i]*speech[i - order];
+    /* End correct to get additional columns of phi */
+    for (r = 1;  r < order;  r++)
+    {
+        for (i = 1;  i <= r;  i++)
+        {
+            phi[i*order + r] = phi[(i - 1)*order + r - 1]
+                             - speech[awinf - (r + 1)]*speech[awinf - (i + 1)]
+                             + speech[start - (r + 2)]*speech[start - (i + 2)];
+        }
+    }
+    /* End correct to get additional elements of PSI */
+    for (i = 0;  i < order - 1;  i++)
+    {
+        psi[i] = phi[i + 1]
+               - speech[start - 2]*speech[start - i - 3]
+               + speech[awinf - 1]*speech[awinf - i - 2];
+    }
+}
+/*- End of function --------------------------------------------------------*/
+
+/* Preemphasize speech with a single-zero filter. */
+/* (When coef = .9375, preemphasis is as in LPC43.) */
+static float preemp(float inbuf[], float pebuf[], int nsamp, float coeff, float z)
+{
+    float temp;
+    int i;
+
+    for (i = 0;  i < nsamp;  i++)
+    {
+        temp = inbuf[i] - coeff*z;
+        z = inbuf[i];
+        pebuf[i] = temp;
+    }
+    return z;
+}
+/*- End of function --------------------------------------------------------*/
+
+/* Invert a covariance matrix using Choleski decomposition method. */
+static void invert(int32_t order, float phi[], float psi[], float rc[])
+{
+    float r1;
+    int32_t i;
+    int32_t j;
+    int32_t k;
+    float v[10][10];
+
+    for (j = 0;  j < order;  j++)
+    {
+        for (i = j;  i < order;  i++)
+            v[j][i] = phi[i + j*order];
+        for (k = 0;  k < j;  k++)
+        {
+            r1 = v[k][j]*v[k][k];
+            for (i = j;  i <= order;  i++)
+                v[j][i] -= v[k][i]*r1;
+        }
+        /* Compute intermediate results, which are similar to RC's */
+        if (fabsf(v[j][j]) < 1.0e-10f)
+        {
+            for (i = j;  i < order;  i++)
+                rc[i] = 0.0f;
+            return;
+        }
+        rc[j] = psi[j];
+        for (k = 0;  k < j;  k++)
+            rc[j] -= rc[k]*v[k][j];
+        v[j][j] = 1.0f/v[j][j];
+        rc[j] *= v[j][j];
+        r1 = min(rc[j], 0.999f);
+        rc[j] = max(r1, -0.999f);
+    }
+}
+/*- End of function --------------------------------------------------------*/
+
+/* Check RC's, repeat previous frame's RC's if unstable */
+static int rcchk(int order, float rc1f[], float rc2f[])
+{
+    int i;
+
+    for (i = 0;  i < order;  i++)
+    {
+        if (fabsf(rc2f[i]) > 0.99f)
+        {
+            for (i = 0;  i < order;  i++)
+                rc2f[i] = rc1f[i];
+            break;
+        }
+    }
+    return 0;
+}
+/*- End of function --------------------------------------------------------*/
+
+static void lpfilt(float inbuf[], float lpbuf[], int32_t len, int32_t nsamp)
+{
+    int32_t j;
+    float t;
+
+    /* 31 point equiripple FIR LPF */
+    /* Linear phase, delay = 15 samples */
+    /* Passband:  ripple = 0.25 dB, cutoff =  800 Hz */
+    /* Stopband:  atten. =  40. dB, cutoff = 1240 Hz */
+
+    for (j = len - nsamp;  j < len;  j++)
+    {
+        t = (inbuf[j] + inbuf[j - 30]) * -0.0097201988f;
+        t += (inbuf[j - 1] + inbuf[j - 29]) * -0.0105179986f;
+        t += (inbuf[j - 2] + inbuf[j - 28]) * -0.0083479648f;
+        t += (inbuf[j - 3] + inbuf[j - 27]) * 5.860774e-4f;
+        t += (inbuf[j - 4] + inbuf[j - 26]) * 0.0130892089f;
+        t += (inbuf[j - 5] + inbuf[j - 25]) * 0.0217052232f;
+        t += (inbuf[j - 6] + inbuf[j - 24]) * 0.0184161253f;
+        t += (inbuf[j - 7] + inbuf[j - 23]) * 3.39723e-4f;
+        t += (inbuf[j - 8] + inbuf[j - 22]) * -0.0260797087f;
+        t += (inbuf[j - 9] + inbuf[j - 21]) * -0.0455563702f;
+        t += (inbuf[j - 10] + inbuf[j - 20]) * -0.040306855f;
+        t += (inbuf[j - 11] + inbuf[j - 19]) * 5.029835e-4f;
+        t += (inbuf[j - 12] + inbuf[j - 18]) * 0.0729262903f;
+        t += (inbuf[j - 13] + inbuf[j - 17]) * 0.1572008878f;
+        t += (inbuf[j - 14] + inbuf[j - 16]) * 0.2247288674f;
+        t += inbuf[j - 15] * 0.250535965f;
+        lpbuf[j] = t;
+    }
+}
+/*- End of function --------------------------------------------------------*/
+
+/* 2nd order inverse filter, speech is decimated 4:1 */
+static void ivfilt(float lpbuf[], float ivbuf[], int32_t len, int32_t nsamp, float ivrc[])
+{
+    int32_t i;
+    int32_t j;
+    int32_t k;
+    float r[3];
+    float pc1;
+    float pc2;
+
+    /* Calculate autocorrelations */
+    for (i = 1;  i <= 3;  i++)
+    {
+        r[i - 1] = 0.0f;
+        k = (i - 1) << 2;
+        for (j = (i << 2) + len - nsamp;  j <= len;  j += 2)
+            r[i - 1] += lpbuf[j - 1]*lpbuf[j - k - 1];
+    }
+    /* Calculate predictor coefficients */
+    pc1 = 0.0f;
+    pc2 = 0.0f;
+    ivrc[0] = 0.0f;
+    ivrc[1] = 0.0f;
+    if (r[0] > 1.0e-10f)
+    {
+        ivrc[0] = r[1]/r[0];
+        ivrc[1] = (r[2] - ivrc[0]*r[1])/(r[0] - ivrc[0]*r[1]);
+        pc1 = ivrc[0] - ivrc[0]*ivrc[1];
+        pc2 = ivrc[1];
+    }
+    /* Inverse filter LPBUF into IVBUF */
+    for (i = len - nsamp;  i < len;  i++)
+        ivbuf[i] = lpbuf[i] - pc1*lpbuf[i - 4] - pc2*lpbuf[i - 8];
+}
+/*- End of function --------------------------------------------------------*/
+
+void lpc10_analyse(lpc10_encode_state_t *s, float speech[], int32_t voice[], int32_t *pitch, float *rms, float rc[])
+{
+    static const int32_t tau[60] =
+    {
+        20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 
+        35, 36, 37, 38, 39, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58,
+        60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 84, 88, 92, 96,
+        100, 104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144,
+        148, 152, 156
+    };
+    static const int32_t buflim[4] =
+    {
+        181, 720, 25, 720
+    };
+    static const float precoef = 0.9375f;
+
+    float amdf[60];
+    float abuf[156];
+    float ivrc[2];
+    float temp;
+    float phi[100]    /* was [10][10] */;
+    float psi[10];
+    int32_t half;
+    int32_t midx;
+    int32_t ewin[3][2];
+    int32_t i;
+    int32_t j;
+    int32_t lanal;
+    int32_t ipitch;
+    int32_t mintau;
+    int32_t minptr;
+    int32_t maxptr;
+
+    /* Calculations are done on future frame due to requirements
+       of the pitch tracker.  Delay RMS and RC's 2 frames to give
+       current frame parameters on return. */
+
+    for (i = 0;  i <= 720 - LPC10_SAMPLES_PER_FRAME - 181;  i++)
+    {
+        s->inbuf[i] = s->inbuf[LPC10_SAMPLES_PER_FRAME + i];
+        s->pebuf[i] = s->pebuf[LPC10_SAMPLES_PER_FRAME + i];
+    }
+    for (i = 0;  i <= 540 - LPC10_SAMPLES_PER_FRAME - 229;  i++)
+        s->ivbuf[i] = s->ivbuf[LPC10_SAMPLES_PER_FRAME + i];
+    for (i = 0;  i <= 720 - LPC10_SAMPLES_PER_FRAME - 25;  i++)
+        s->lpbuf[i] = s->lpbuf[LPC10_SAMPLES_PER_FRAME + i];
+    for (i = 0, j = 0;  i < s->osptr - 1;  i++)
+    {
+        if (s->osbuf[i] > LPC10_SAMPLES_PER_FRAME)
+            s->osbuf[j++] = s->osbuf[i] - LPC10_SAMPLES_PER_FRAME;
+    }
+    s->osptr = j + 1;
+    s->voibuf[0][0] = s->voibuf[1][0];
+    s->voibuf[0][1] = s->voibuf[1][1];
+    for (i = 0;  i < 2;  i++)
+    {
+        s->vwin[i][0] = s->vwin[i + 1][0] - LPC10_SAMPLES_PER_FRAME;
+        s->vwin[i][1] = s->vwin[i + 1][1] - LPC10_SAMPLES_PER_FRAME;
+        s->awin[i][0] = s->awin[i + 1][0] - LPC10_SAMPLES_PER_FRAME;
+        s->awin[i][1] = s->awin[i + 1][1] - LPC10_SAMPLES_PER_FRAME;
+        s->obound[i] = s->obound[i + 1];
+        s->voibuf[i + 1][0] = s->voibuf[i + 2][0];
+        s->voibuf[i + 1][1] = s->voibuf[i + 2][1];
+        s->rmsbuf[i] = s->rmsbuf[i + 1];
+        for (j = 0;  j < LPC10_ORDER;  j++)
+            s->rcbuf[i][j] = s->rcbuf[i + 1][j];
+    }
+    /* If the average value in the frame was over 1/4096 (after current
+       BIAS correction), then subtract that much more from samples in the
+       next frame.  If the average value in the frame was under
+       -1/4096, add 1/4096 more to samples in next frame.  In all other
+       cases, keep BIAS the same. */
+    temp = 0.0f;
+    for (i = 0;  i < LPC10_SAMPLES_PER_FRAME;  i++)
+    {
+        s->inbuf[720 - 2*LPC10_SAMPLES_PER_FRAME + i] = speech[i]*4096.0f - s->bias;
+        temp += s->inbuf[720 - 2*LPC10_SAMPLES_PER_FRAME + i];
+    }
+    if (temp > (float) LPC10_SAMPLES_PER_FRAME)
+        s->bias++;
+    else if (temp < (float) (-LPC10_SAMPLES_PER_FRAME))
+        s->bias--;
+    /* Place voicing window */
+    i = 721 - LPC10_SAMPLES_PER_FRAME;
+    s->zpre = preemp(&s->inbuf[i - 181], &s->pebuf[i - 181], LPC10_SAMPLES_PER_FRAME, precoef, s->zpre);
+    onset(s, s->pebuf, s->osbuf, &s->osptr, 10, 181, 720, LPC10_SAMPLES_PER_FRAME);
+
+    lpc10_placev(s->osbuf, &s->osptr, 10, &s->obound[2], s->vwin, 3, LPC10_SAMPLES_PER_FRAME, 90, 156, 307, 462);
+    /* The Pitch Extraction algorithm estimates the pitch for a frame
+       of speech by locating the minimum of the average magnitude difference
+       function (AMDF).  The AMDF operates on low-pass, inverse filtered
+       speech.  (The low-pass filter is an 800 Hz, 19 tap, equiripple, FIR
+       filter and the inverse filter is a 2nd-order LPC filter.)  The pitch
+       estimate is later refined by dynamic tracking.  However, since some
+       of the tracking parameters are a function of the voicing decisions,
+       a voicing decision must precede the final pitch estimation. */
+    /* See subroutines LPFILT, IVFILT, and eval_highres_amdf. */
+    /* LPFILT reads indices LBUFH-LFRAME-29 = 511 through LBUFH = 720
+       of INBUF, and writes indices LBUFH+1-LFRAME = 541 through LBUFH
+       = 720 of LPBUF. */
+    lpfilt(&s->inbuf[228], &s->lpbuf[384], 312, LPC10_SAMPLES_PER_FRAME);
+    /* IVFILT reads indices (PWINH-LFRAME-7) = 353 through PWINH = 540
+       of LPBUF, and writes indices (PWINH-LFRAME+1) = 361 through
+       PWINH = 540 of IVBUF. */
+    ivfilt(&s->lpbuf[204], s->ivbuf, 312, LPC10_SAMPLES_PER_FRAME, ivrc);
+    /* eval_highres_amdf reads indices PWINL = 229 through
+       (PWINL-1)+MAXWIN+(TAU(LTAU)-TAU(1))/2 = 452 of IVBUF, and writes
+       indices 1 through LTAU = 60 of AMDF. */
+    eval_highres_amdf(s->ivbuf, 156, tau, 60, amdf, &minptr, &maxptr, &mintau);
+    /* Voicing decisions are made for each half frame of input speech.
+       An initial voicing classification is made for each half of the
+       analysis frame, and the voicing decisions for the present frame
+       are finalized.  See subroutine VOICIN. */
+    /*        The voicing detector (VOICIN) classifies the input signal as
+       unvoiced (including silence) or voiced using the AMDF windowed
+       maximum-to-minimum ratio, the zero crossing rate, energy measures,
+       reflection coefficients, and prediction gains. */
+    /*        The pitch and voicing rules apply smoothing and isolated
+       corrections to the pitch and voicing estimates and, in the process,
+       introduce two frames of delay into the corrected pitch estimates and
+       voicing decisions. */
+    for (half = 0;  half < 2;  half++)
+    {
+        lpc10_voicing(s,
+                      &s->vwin[2][0],
+                      s->inbuf,
+                      s->lpbuf,
+                      buflim,
+                      half,
+                      &amdf[minptr],
+                      &amdf[maxptr],
+                      &mintau,
+                      ivrc,
+                      s->obound);
+    }
+    /* Find the minimum cost pitch decision over several frames,
+       given the current voicing decision and the AMDF array */
+    minptr++;
+    dynamic_pitch_tracking(s, amdf, 60, &minptr, s->voibuf[3][1], pitch, &midx);
+    ipitch = tau[midx - 1];
+    /* Place spectrum analysis and energy windows */
+    lpc10_placea(&ipitch, s->voibuf, &s->obound[2], 3, s->vwin, s->awin, ewin, LPC10_SAMPLES_PER_FRAME, 156);
+    /* Remove short term DC bias over the analysis window. */
+    lanal = s->awin[2][1] + 1 - s->awin[2][0];
+    remove_dc_bias(&s->pebuf[s->awin[2][0] - 181], lanal, abuf);
+    /* Compute RMS over integer number of pitch periods within the analysis window. */
+    /* Note that in a hardware implementation this computation may be
+       simplified by using diagonal elements of phi computed by mload(). */
+    s->rmsbuf[2] = energyf(&abuf[ewin[2][0] - s->awin[2][0]], ewin[2][1] - ewin[2][0] + 1);
+    /* Matrix load and invert, check RC's for stability */
+    mload(LPC10_ORDER, 1, lanal, abuf, phi, psi);
+    invert(LPC10_ORDER, phi, psi, &s->rcbuf[2][0]);
+    rcchk(LPC10_ORDER, &s->rcbuf[1][0], &s->rcbuf[2][0]);
+    /* Set return parameters */
+    voice[0] = s->voibuf[1][0];
+    voice[1] = s->voibuf[1][1];
+    *rms = s->rmsbuf[0];
+    for (i = 0;  i < LPC10_ORDER;  i++)
+        rc[i] = s->rcbuf[0][i];
+}
+/*- End of function --------------------------------------------------------*/
+/*- End of file ------------------------------------------------------------*/
author	Peter Meerwald <pmeerw@cosy.sbg.ac.at>
date	Fri, 25 Jun 2010 16:00:21 +0200
parents
children