comparison spandsp-0.0.3/spandsp-0.0.3/src/lpc10_analyse.c @ 5:f762bf195c4b

import spandsp-0.0.3
author Peter Meerwald <pmeerw@cosy.sbg.ac.at>
date Fri, 25 Jun 2010 16:00:21 +0200
parents
children
comparison
equal deleted inserted replaced
4:26cd8f1ef0b1 5:f762bf195c4b
1 /*
2 * SpanDSP - a series of DSP components for telephony
3 *
4 * lpc10_analyse.c - LPC10 low bit rate speech codec.
5 *
6 * Written by Steve Underwood <steveu@coppice.org>
7 *
8 * Copyright (C) 2006 Steve Underwood
9 *
10 * All rights reserved.
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License version 2, as
14 * published by the Free Software Foundation.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 *
25 * This code is based on the U.S. Department of Defense reference
26 * implementation of the LPC-10 2400 bps Voice Coder. They do not
27 * exert copyright claims on their code, and it may be freely used.
28 *
29 * $Id: lpc10_analyse.c,v 1.12 2006/11/30 15:41:47 steveu Exp $
30 */
31
32 #ifdef HAVE_CONFIG_H
33 #include <config.h>
34 #endif
35
36 #include <stdlib.h>
37 #include <stdio.h>
38 #include <inttypes.h>
39 #include <memory.h>
40 #if defined(HAVE_TGMATH_H)
41 #include <tgmath.h>
42 #endif
43 #if defined(HAVE_MATH_H)
44 #include <math.h>
45 #endif
46
47 #include "spandsp/telephony.h"
48 #include "spandsp/dc_restore.h"
49 #include "spandsp/lpc10.h"
50
51 #include "lpc10_encdecs.h"
52
53 static __inline__ float energyf(float amp[], int len)
54 {
55 int i;
56 float rms;
57
58 rms = 0.0f;
59 for (i = 0; i < len; i++)
60 rms += amp[i]*amp[i];
61 rms = sqrtf(rms/len);
62 return rms;
63 }
64 /*- End of function --------------------------------------------------------*/
65
66 static void remove_dc_bias(float speech[], int len, float sigout[])
67 {
68 float bias;
69 int i;
70
71 bias = 0.0f;
72 for (i = 0; i < len; i++)
73 bias += speech[i];
74 bias /= len;
75 for (i = 0; i < len; i++)
76 sigout[i] = speech[i] - bias;
77 }
78 /*- End of function --------------------------------------------------------*/
79
80 static void eval_amdf(float speech[],
81 int32_t lpita,
82 const int32_t tau[],
83 int32_t ltau,
84 int32_t maxlag,
85 float amdf[],
86 int32_t *minptr,
87 int32_t *maxptr)
88 {
89 float sum;
90 int i;
91 int j;
92 int n1;
93 int n2;
94
95 *minptr = 0;
96 *maxptr = 0;
97 for (i = 0; i < ltau; i++)
98 {
99 n1 = (maxlag - tau[i])/2 + 1;
100 n2 = n1 + lpita - 1;
101 sum = 0.0f;
102 for (j = n1; j <= n2; j += 4)
103 sum += fabsf(speech[j - 1] - speech[j + tau[i] - 1]);
104 amdf[i] = sum;
105 if (amdf[i] < amdf[*minptr])
106 *minptr = i;
107 if (amdf[i] > amdf[*maxptr])
108 *maxptr = i;
109 }
110 }
111 /*- End of function --------------------------------------------------------*/
112
113 static void eval_highres_amdf(float speech[],
114 int32_t lpita,
115 const int32_t tau[],
116 int32_t ltau,
117 float amdf[],
118 int32_t *minptr,
119 int32_t *maxptr,
120 int32_t *mintau)
121 {
122 float amdf2[6];
123 int32_t tau2[6];
124 int32_t minp2;
125 int32_t ltau2;
126 int32_t maxp2;
127 int32_t minamd;
128 int i;
129 int i2;
130 int ptr;
131
132 /* Compute full AMDF using log spaced lags, find coarse minimum */
133 eval_amdf(speech, lpita, tau, ltau, tau[ltau - 1], amdf, minptr, maxptr);
134 *mintau = tau[*minptr];
135 minamd = (int32_t) amdf[*minptr];
136
137 /* Build table containing all lags within +/- 3 of the AMDF minimum,
138 excluding all that have already been computed */
139 ltau2 = 0;
140 ptr = *minptr - 2;
141 i2 = min(*mintau + 4, tau[ltau - 1]);
142 for (i = max(*mintau - 3, 41); i < i2; i++)
143 {
144 while (tau[ptr] < i)
145 ptr++;
146 if (tau[ptr] != i)
147 tau2[ltau2++] = i;
148 }
149 /* Compute AMDF of the new lags, if there are any, and choose one
150 if it is better than the coarse minimum */
151 if (ltau2 > 0)
152 {
153 eval_amdf(speech, lpita, tau2, ltau2, tau[ltau - 1], amdf2, &minp2, &maxp2);
154 if (amdf2[minp2] < (float) minamd)
155 {
156 *mintau = tau2[minp2];
157 minamd = (int32_t) amdf2[minp2];
158 }
159 }
160 /* Check one octave up, if there are any lags not yet computed */
161 if (*mintau >= 80)
162 {
163 i = *mintau/2;
164 if ((i & 1) == 0)
165 {
166 ltau2 = 2;
167 tau2[0] = i - 1;
168 tau2[1] = i + 1;
169 }
170 else
171 {
172 ltau2 = 1;
173 tau2[0] = i;
174 }
175 eval_amdf(speech, lpita, tau2, ltau2, tau[ltau - 1], amdf2, &minp2, &maxp2);
176 if (amdf2[minp2] < (float) minamd)
177 {
178 *mintau = tau2[minp2];
179 minamd = (int32_t) amdf2[minp2];
180 *minptr -= 20;
181 }
182 }
183 /* Force minimum of the AMDF array to the high resolution minimum */
184 amdf[*minptr] = (float) minamd;
185 /* Find maximum of AMDF within 1/2 octave of minimum */
186 *maxptr = max(*minptr - 5, 0);
187 i2 = min(*minptr + 6, ltau);
188 for (i = *maxptr; i < i2; i++)
189 {
190 if (amdf[i] > amdf[*maxptr])
191 *maxptr = i;
192 }
193 }
194 /*- End of function --------------------------------------------------------*/
195
196 static void dynamic_pitch_tracking(lpc10_encode_state_t *s,
197 float amdf[],
198 int32_t ltau,
199 int32_t *minptr,
200 int32_t voice,
201 int32_t *pitch,
202 int32_t *midx)
203 {
204 int32_t pbar;
205 float sbar;
206 int32_t path[2];
207 int32_t i;
208 int32_t j;
209 float alpha;
210 float minsc;
211 float maxsc;
212
213 /* Calculate the confidence factor ALPHA, used as a threshold slope in */
214 /* SEESAW. If unvoiced, set high slope so that every point in P array */
215 /*is marked as a potential pitch frequency. A scaled up version (ALPHAX )*/
216 /* is used to maintain arithmetic precision. */
217 if (voice == 1)
218 s->alphax = s->alphax*0.75f + amdf[*minptr - 1]*0.5f;
219 else
220 s->alphax *= 0.984375f;
221 alpha = s->alphax/16;
222 if (voice == 0 && s->alphax < 128.0f)
223 alpha = 8.0f;
224 /* SEESAW: Construct a pitch pointer array and intermediate winner function */
225 /* Left to right pass: */
226 s->p[s->ipoint][0] = 1;
227 pbar = 1;
228 sbar = s->s[0];
229 for (i = 0; i < ltau; i++)
230 {
231 sbar += alpha;
232 if (sbar < s->s[i])
233 {
234 s->s[i] = sbar;
235 }
236 else
237 {
238 pbar = i + 1;
239 sbar = s->s[i];
240 }
241 s->p[s->ipoint][i] = pbar;
242 }
243 /* Right to left pass: */
244 sbar = s->s[pbar - 1];
245 for (i = pbar - 2; i >= 0; i--)
246 {
247 sbar += alpha;
248 if (sbar < s->s[i])
249 {
250 s->s[i] = sbar;
251 s->p[s->ipoint][i] = pbar;
252 }
253 else
254 {
255 pbar = s->p[s->ipoint][i];
256 i = pbar - 1;
257 sbar = s->s[i];
258 }
259 }
260 /* Update S using AMDF */
261 /* Find maximum, minimum, and location of minimum */
262 s->s[0] += amdf[0]/2;
263 minsc = s->s[0];
264 maxsc = minsc;
265 *midx = 1;
266 for (i = 1; i < ltau; i++)
267 {
268 s->s[i] += amdf[i]/2;
269 if (s->s[i] > maxsc)
270 maxsc = s->s[i];
271 if (s->s[i] < minsc)
272 {
273 *midx = i + 1;
274 minsc = s->s[i];
275 }
276 }
277 /* Subtract MINSC from S to prevent overflow */
278 for (i = 0; i < ltau; i++)
279 s->s[i] -= minsc;
280 maxsc -= minsc;
281 /* Use higher octave pitch if significant null there */
282 j = 0;
283 for (i = 20; i <= 40; i += 10)
284 {
285 if (*midx > i)
286 {
287 if (s->s[*midx - i - 1] < maxsc / 4)
288 j = i;
289 }
290 }
291 *midx -= j;
292 /* TRACE: look back two frames to find minimum cost pitch estimate */
293 *pitch = *midx;
294 for (i = 0, j = s->ipoint; i < 2; i++, j++)
295 {
296 *pitch = s->p[j & 1][*pitch - 1];
297 path[i] = *pitch;
298 }
299
300 /* The following statement subtracts one from IPOINT, mod DEPTH. I */
301 /* think the author chose to add DEPTH-1, instead of subtracting 1, */
302 /* because then it will work even if MOD doesn't work as desired on */
303 /* negative arguments. */
304 s->ipoint = (s->ipoint + 1) & 1;
305 }
306 /*- End of function --------------------------------------------------------*/
307
308 /* Detection of onsets in (or slightly preceding) the futuremost frame of speech. */
309 static void onset(lpc10_encode_state_t *s,
310 float *pebuf,
311 int32_t osbuf[],
312 int32_t *osptr,
313 int32_t oslen,
314 int32_t sbufl,
315 int32_t sbufh,
316 int32_t lframe)
317 {
318 int32_t i;
319 float r1;
320 float l2sum2;
321
322 pebuf -= sbufl;
323
324 if (s->hyst)
325 s->lasti -= lframe;
326 for (i = sbufh - lframe + 1; i <= sbufh; i++)
327 {
328 /* Compute FPC; Use old FPC on divide by zero; Clamp FPC to +/- 1. */
329 s->n = (pebuf[i]*pebuf[i - 1] + s->n*63.0f)/64.0f;
330 /* Computing 2nd power */
331 r1 = pebuf[i - 1];
332 s->d__ = (r1*r1 + s->d__*63.0f)/64.0f;
333 if (s->d__ != 0.0f)
334 {
335 if (fabsf(s->n) > s->d__)
336 s->fpc = r_sign(1.0f, s->n);
337 else
338 s->fpc = s->n/s->d__;
339 }
340 /* Filter FPC */
341 l2sum2 = s->l2buf[s->l2ptr1 - 1];
342 s->l2sum1 = s->l2sum1 - s->l2buf[s->l2ptr2 - 1] + s->fpc;
343 s->l2buf[s->l2ptr2 - 1] = s->l2sum1;
344 s->l2buf[s->l2ptr1 - 1] = s->fpc;
345 s->l2ptr1 = (s->l2ptr1 & 0xF) + 1;
346 s->l2ptr2 = (s->l2ptr2 & 0xF) + 1;
347 if (fabsf(s->l2sum1 - l2sum2) > 1.7f)
348 {
349 if (!s->hyst)
350 {
351 /* Ignore if buffer full */
352 if (*osptr <= oslen)
353 {
354 osbuf[*osptr - 1] = i - 9;
355 (*osptr)++;
356 }
357 s->hyst = TRUE;
358 }
359 s->lasti = i;
360 /* After one onset detection, at least OSHYST sample times must go */
361 /* by before another is allowed to occur. */
362 }
363 else if (s->hyst && i - s->lasti >= 10)
364 {
365 s->hyst = FALSE;
366 }
367 }
368 }
369 /*- End of function --------------------------------------------------------*/
370
371 /* Load a covariance matrix. */
372 static void mload(int32_t order, int32_t awins, int32_t awinf, float speech[], float phi[], float psi[])
373 {
374 int32_t start;
375 int i;
376 int r;
377
378 start = awins + order;
379 for (r = 1; r <= order; r++)
380 {
381 phi[r - 1] = 0.0f;
382 for (i = start; i <= awinf; i++)
383 phi[r - 1] += speech[i - 2]*speech[i - r - 1];
384 }
385
386 /* Load last element of vector PSI */
387 psi[order - 1] = 0.0f;
388 for (i = start - 1; i < awinf; i++)
389 psi[order - 1] += speech[i]*speech[i - order];
390 /* End correct to get additional columns of phi */
391 for (r = 1; r < order; r++)
392 {
393 for (i = 1; i <= r; i++)
394 {
395 phi[i*order + r] = phi[(i - 1)*order + r - 1]
396 - speech[awinf - (r + 1)]*speech[awinf - (i + 1)]
397 + speech[start - (r + 2)]*speech[start - (i + 2)];
398 }
399 }
400 /* End correct to get additional elements of PSI */
401 for (i = 0; i < order - 1; i++)
402 {
403 psi[i] = phi[i + 1]
404 - speech[start - 2]*speech[start - i - 3]
405 + speech[awinf - 1]*speech[awinf - i - 2];
406 }
407 }
408 /*- End of function --------------------------------------------------------*/
409
410 /* Preemphasize speech with a single-zero filter. */
411 /* (When coef = .9375, preemphasis is as in LPC43.) */
412 static float preemp(float inbuf[], float pebuf[], int nsamp, float coeff, float z)
413 {
414 float temp;
415 int i;
416
417 for (i = 0; i < nsamp; i++)
418 {
419 temp = inbuf[i] - coeff*z;
420 z = inbuf[i];
421 pebuf[i] = temp;
422 }
423 return z;
424 }
425 /*- End of function --------------------------------------------------------*/
426
427 /* Invert a covariance matrix using Choleski decomposition method. */
428 static void invert(int32_t order, float phi[], float psi[], float rc[])
429 {
430 float r1;
431 int32_t i;
432 int32_t j;
433 int32_t k;
434 float v[10][10];
435
436 for (j = 0; j < order; j++)
437 {
438 for (i = j; i < order; i++)
439 v[j][i] = phi[i + j*order];
440 for (k = 0; k < j; k++)
441 {
442 r1 = v[k][j]*v[k][k];
443 for (i = j; i <= order; i++)
444 v[j][i] -= v[k][i]*r1;
445 }
446 /* Compute intermediate results, which are similar to RC's */
447 if (fabsf(v[j][j]) < 1.0e-10f)
448 {
449 for (i = j; i < order; i++)
450 rc[i] = 0.0f;
451 return;
452 }
453 rc[j] = psi[j];
454 for (k = 0; k < j; k++)
455 rc[j] -= rc[k]*v[k][j];
456 v[j][j] = 1.0f/v[j][j];
457 rc[j] *= v[j][j];
458 r1 = min(rc[j], 0.999f);
459 rc[j] = max(r1, -0.999f);
460 }
461 }
462 /*- End of function --------------------------------------------------------*/
463
464 /* Check RC's, repeat previous frame's RC's if unstable */
465 static int rcchk(int order, float rc1f[], float rc2f[])
466 {
467 int i;
468
469 for (i = 0; i < order; i++)
470 {
471 if (fabsf(rc2f[i]) > 0.99f)
472 {
473 for (i = 0; i < order; i++)
474 rc2f[i] = rc1f[i];
475 break;
476 }
477 }
478 return 0;
479 }
480 /*- End of function --------------------------------------------------------*/
481
482 static void lpfilt(float inbuf[], float lpbuf[], int32_t len, int32_t nsamp)
483 {
484 int32_t j;
485 float t;
486
487 /* 31 point equiripple FIR LPF */
488 /* Linear phase, delay = 15 samples */
489 /* Passband: ripple = 0.25 dB, cutoff = 800 Hz */
490 /* Stopband: atten. = 40. dB, cutoff = 1240 Hz */
491
492 for (j = len - nsamp; j < len; j++)
493 {
494 t = (inbuf[j] + inbuf[j - 30]) * -0.0097201988f;
495 t += (inbuf[j - 1] + inbuf[j - 29]) * -0.0105179986f;
496 t += (inbuf[j - 2] + inbuf[j - 28]) * -0.0083479648f;
497 t += (inbuf[j - 3] + inbuf[j - 27]) * 5.860774e-4f;
498 t += (inbuf[j - 4] + inbuf[j - 26]) * 0.0130892089f;
499 t += (inbuf[j - 5] + inbuf[j - 25]) * 0.0217052232f;
500 t += (inbuf[j - 6] + inbuf[j - 24]) * 0.0184161253f;
501 t += (inbuf[j - 7] + inbuf[j - 23]) * 3.39723e-4f;
502 t += (inbuf[j - 8] + inbuf[j - 22]) * -0.0260797087f;
503 t += (inbuf[j - 9] + inbuf[j - 21]) * -0.0455563702f;
504 t += (inbuf[j - 10] + inbuf[j - 20]) * -0.040306855f;
505 t += (inbuf[j - 11] + inbuf[j - 19]) * 5.029835e-4f;
506 t += (inbuf[j - 12] + inbuf[j - 18]) * 0.0729262903f;
507 t += (inbuf[j - 13] + inbuf[j - 17]) * 0.1572008878f;
508 t += (inbuf[j - 14] + inbuf[j - 16]) * 0.2247288674f;
509 t += inbuf[j - 15] * 0.250535965f;
510 lpbuf[j] = t;
511 }
512 }
513 /*- End of function --------------------------------------------------------*/
514
515 /* 2nd order inverse filter, speech is decimated 4:1 */
516 static void ivfilt(float lpbuf[], float ivbuf[], int32_t len, int32_t nsamp, float ivrc[])
517 {
518 int32_t i;
519 int32_t j;
520 int32_t k;
521 float r[3];
522 float pc1;
523 float pc2;
524
525 /* Calculate autocorrelations */
526 for (i = 1; i <= 3; i++)
527 {
528 r[i - 1] = 0.0f;
529 k = (i - 1) << 2;
530 for (j = (i << 2) + len - nsamp; j <= len; j += 2)
531 r[i - 1] += lpbuf[j - 1]*lpbuf[j - k - 1];
532 }
533 /* Calculate predictor coefficients */
534 pc1 = 0.0f;
535 pc2 = 0.0f;
536 ivrc[0] = 0.0f;
537 ivrc[1] = 0.0f;
538 if (r[0] > 1.0e-10f)
539 {
540 ivrc[0] = r[1]/r[0];
541 ivrc[1] = (r[2] - ivrc[0]*r[1])/(r[0] - ivrc[0]*r[1]);
542 pc1 = ivrc[0] - ivrc[0]*ivrc[1];
543 pc2 = ivrc[1];
544 }
545 /* Inverse filter LPBUF into IVBUF */
546 for (i = len - nsamp; i < len; i++)
547 ivbuf[i] = lpbuf[i] - pc1*lpbuf[i - 4] - pc2*lpbuf[i - 8];
548 }
549 /*- End of function --------------------------------------------------------*/
550
551 void lpc10_analyse(lpc10_encode_state_t *s, float speech[], int32_t voice[], int32_t *pitch, float *rms, float rc[])
552 {
553 static const int32_t tau[60] =
554 {
555 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
556 35, 36, 37, 38, 39, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58,
557 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 84, 88, 92, 96,
558 100, 104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144,
559 148, 152, 156
560 };
561 static const int32_t buflim[4] =
562 {
563 181, 720, 25, 720
564 };
565 static const float precoef = 0.9375f;
566
567 float amdf[60];
568 float abuf[156];
569 float ivrc[2];
570 float temp;
571 float phi[100] /* was [10][10] */;
572 float psi[10];
573 int32_t half;
574 int32_t midx;
575 int32_t ewin[3][2];
576 int32_t i;
577 int32_t j;
578 int32_t lanal;
579 int32_t ipitch;
580 int32_t mintau;
581 int32_t minptr;
582 int32_t maxptr;
583
584 /* Calculations are done on future frame due to requirements
585 of the pitch tracker. Delay RMS and RC's 2 frames to give
586 current frame parameters on return. */
587
588 for (i = 0; i <= 720 - LPC10_SAMPLES_PER_FRAME - 181; i++)
589 {
590 s->inbuf[i] = s->inbuf[LPC10_SAMPLES_PER_FRAME + i];
591 s->pebuf[i] = s->pebuf[LPC10_SAMPLES_PER_FRAME + i];
592 }
593 for (i = 0; i <= 540 - LPC10_SAMPLES_PER_FRAME - 229; i++)
594 s->ivbuf[i] = s->ivbuf[LPC10_SAMPLES_PER_FRAME + i];
595 for (i = 0; i <= 720 - LPC10_SAMPLES_PER_FRAME - 25; i++)
596 s->lpbuf[i] = s->lpbuf[LPC10_SAMPLES_PER_FRAME + i];
597 for (i = 0, j = 0; i < s->osptr - 1; i++)
598 {
599 if (s->osbuf[i] > LPC10_SAMPLES_PER_FRAME)
600 s->osbuf[j++] = s->osbuf[i] - LPC10_SAMPLES_PER_FRAME;
601 }
602 s->osptr = j + 1;
603 s->voibuf[0][0] = s->voibuf[1][0];
604 s->voibuf[0][1] = s->voibuf[1][1];
605 for (i = 0; i < 2; i++)
606 {
607 s->vwin[i][0] = s->vwin[i + 1][0] - LPC10_SAMPLES_PER_FRAME;
608 s->vwin[i][1] = s->vwin[i + 1][1] - LPC10_SAMPLES_PER_FRAME;
609 s->awin[i][0] = s->awin[i + 1][0] - LPC10_SAMPLES_PER_FRAME;
610 s->awin[i][1] = s->awin[i + 1][1] - LPC10_SAMPLES_PER_FRAME;
611 s->obound[i] = s->obound[i + 1];
612 s->voibuf[i + 1][0] = s->voibuf[i + 2][0];
613 s->voibuf[i + 1][1] = s->voibuf[i + 2][1];
614 s->rmsbuf[i] = s->rmsbuf[i + 1];
615 for (j = 0; j < LPC10_ORDER; j++)
616 s->rcbuf[i][j] = s->rcbuf[i + 1][j];
617 }
618 /* If the average value in the frame was over 1/4096 (after current
619 BIAS correction), then subtract that much more from samples in the
620 next frame. If the average value in the frame was under
621 -1/4096, add 1/4096 more to samples in next frame. In all other
622 cases, keep BIAS the same. */
623 temp = 0.0f;
624 for (i = 0; i < LPC10_SAMPLES_PER_FRAME; i++)
625 {
626 s->inbuf[720 - 2*LPC10_SAMPLES_PER_FRAME + i] = speech[i]*4096.0f - s->bias;
627 temp += s->inbuf[720 - 2*LPC10_SAMPLES_PER_FRAME + i];
628 }
629 if (temp > (float) LPC10_SAMPLES_PER_FRAME)
630 s->bias++;
631 else if (temp < (float) (-LPC10_SAMPLES_PER_FRAME))
632 s->bias--;
633 /* Place voicing window */
634 i = 721 - LPC10_SAMPLES_PER_FRAME;
635 s->zpre = preemp(&s->inbuf[i - 181], &s->pebuf[i - 181], LPC10_SAMPLES_PER_FRAME, precoef, s->zpre);
636 onset(s, s->pebuf, s->osbuf, &s->osptr, 10, 181, 720, LPC10_SAMPLES_PER_FRAME);
637
638 lpc10_placev(s->osbuf, &s->osptr, 10, &s->obound[2], s->vwin, 3, LPC10_SAMPLES_PER_FRAME, 90, 156, 307, 462);
639 /* The Pitch Extraction algorithm estimates the pitch for a frame
640 of speech by locating the minimum of the average magnitude difference
641 function (AMDF). The AMDF operates on low-pass, inverse filtered
642 speech. (The low-pass filter is an 800 Hz, 19 tap, equiripple, FIR
643 filter and the inverse filter is a 2nd-order LPC filter.) The pitch
644 estimate is later refined by dynamic tracking. However, since some
645 of the tracking parameters are a function of the voicing decisions,
646 a voicing decision must precede the final pitch estimation. */
647 /* See subroutines LPFILT, IVFILT, and eval_highres_amdf. */
648 /* LPFILT reads indices LBUFH-LFRAME-29 = 511 through LBUFH = 720
649 of INBUF, and writes indices LBUFH+1-LFRAME = 541 through LBUFH
650 = 720 of LPBUF. */
651 lpfilt(&s->inbuf[228], &s->lpbuf[384], 312, LPC10_SAMPLES_PER_FRAME);
652 /* IVFILT reads indices (PWINH-LFRAME-7) = 353 through PWINH = 540
653 of LPBUF, and writes indices (PWINH-LFRAME+1) = 361 through
654 PWINH = 540 of IVBUF. */
655 ivfilt(&s->lpbuf[204], s->ivbuf, 312, LPC10_SAMPLES_PER_FRAME, ivrc);
656 /* eval_highres_amdf reads indices PWINL = 229 through
657 (PWINL-1)+MAXWIN+(TAU(LTAU)-TAU(1))/2 = 452 of IVBUF, and writes
658 indices 1 through LTAU = 60 of AMDF. */
659 eval_highres_amdf(s->ivbuf, 156, tau, 60, amdf, &minptr, &maxptr, &mintau);
660 /* Voicing decisions are made for each half frame of input speech.
661 An initial voicing classification is made for each half of the
662 analysis frame, and the voicing decisions for the present frame
663 are finalized. See subroutine VOICIN. */
664 /* The voicing detector (VOICIN) classifies the input signal as
665 unvoiced (including silence) or voiced using the AMDF windowed
666 maximum-to-minimum ratio, the zero crossing rate, energy measures,
667 reflection coefficients, and prediction gains. */
668 /* The pitch and voicing rules apply smoothing and isolated
669 corrections to the pitch and voicing estimates and, in the process,
670 introduce two frames of delay into the corrected pitch estimates and
671 voicing decisions. */
672 for (half = 0; half < 2; half++)
673 {
674 lpc10_voicing(s,
675 &s->vwin[2][0],
676 s->inbuf,
677 s->lpbuf,
678 buflim,
679 half,
680 &amdf[minptr],
681 &amdf[maxptr],
682 &mintau,
683 ivrc,
684 s->obound);
685 }
686 /* Find the minimum cost pitch decision over several frames,
687 given the current voicing decision and the AMDF array */
688 minptr++;
689 dynamic_pitch_tracking(s, amdf, 60, &minptr, s->voibuf[3][1], pitch, &midx);
690 ipitch = tau[midx - 1];
691 /* Place spectrum analysis and energy windows */
692 lpc10_placea(&ipitch, s->voibuf, &s->obound[2], 3, s->vwin, s->awin, ewin, LPC10_SAMPLES_PER_FRAME, 156);
693 /* Remove short term DC bias over the analysis window. */
694 lanal = s->awin[2][1] + 1 - s->awin[2][0];
695 remove_dc_bias(&s->pebuf[s->awin[2][0] - 181], lanal, abuf);
696 /* Compute RMS over integer number of pitch periods within the analysis window. */
697 /* Note that in a hardware implementation this computation may be
698 simplified by using diagonal elements of phi computed by mload(). */
699 s->rmsbuf[2] = energyf(&abuf[ewin[2][0] - s->awin[2][0]], ewin[2][1] - ewin[2][0] + 1);
700 /* Matrix load and invert, check RC's for stability */
701 mload(LPC10_ORDER, 1, lanal, abuf, phi, psi);
702 invert(LPC10_ORDER, phi, psi, &s->rcbuf[2][0]);
703 rcchk(LPC10_ORDER, &s->rcbuf[1][0], &s->rcbuf[2][0]);
704 /* Set return parameters */
705 voice[0] = s->voibuf[1][0];
706 voice[1] = s->voibuf[1][1];
707 *rms = s->rmsbuf[0];
708 for (i = 0; i < LPC10_ORDER; i++)
709 rc[i] = s->rcbuf[0][i];
710 }
711 /*- End of function --------------------------------------------------------*/
712 /*- End of file ------------------------------------------------------------*/

Repositories maintained by Peter Meerwald, pmeerw@pmeerw.net.