comparison spandsp-0.0.6pre17/src/lpc10_analyse.c @ 4:26cd8f1ef0b1

import spandsp-0.0.6pre17
author Peter Meerwald <pmeerw@cosy.sbg.ac.at>
date Fri, 25 Jun 2010 15:50:58 +0200
parents
children
comparison
equal deleted inserted replaced
3:c6c5a16ce2f2 4:26cd8f1ef0b1
1 /*
2 * SpanDSP - a series of DSP components for telephony
3 *
4 * lpc10_analyse.c - LPC10 low bit rate speech codec.
5 *
6 * Written by Steve Underwood <steveu@coppice.org>
7 *
8 * Copyright (C) 2006 Steve Underwood
9 *
10 * All rights reserved.
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU Lesser General Public License version 2.1,
14 * as published by the Free Software Foundation.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU Lesser General Public License for more details.
20 *
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 *
25 * This code is based on the U.S. Department of Defense reference
26 * implementation of the LPC-10 2400 bps Voice Coder. They do not
27 * exert copyright claims on their code, and it may be freely used.
28 *
29 * $Id: lpc10_analyse.c,v 1.22 2009/01/28 03:41:27 steveu Exp $
30 */
31
32 #if defined(HAVE_CONFIG_H)
33 #include "config.h"
34 #endif
35
36 #include <stdlib.h>
37 #include <stdio.h>
38 #include <inttypes.h>
39 #include <memory.h>
40 #if defined(HAVE_TGMATH_H)
41 #include <tgmath.h>
42 #endif
43 #if defined(HAVE_MATH_H)
44 #include <math.h>
45 #endif
46 #include "floating_fudge.h"
47
48 #include "spandsp/telephony.h"
49 #include "spandsp/dc_restore.h"
50 #include "spandsp/lpc10.h"
51 #include "spandsp/private/lpc10.h"
52
53 #include "lpc10_encdecs.h"
54
55 static __inline__ float energyf(float amp[], int len)
56 {
57 int i;
58 float rms;
59
60 rms = 0.0f;
61 for (i = 0; i < len; i++)
62 rms += amp[i]*amp[i];
63 rms = sqrtf(rms/len);
64 return rms;
65 }
66 /*- End of function --------------------------------------------------------*/
67
68 static void remove_dc_bias(float speech[], int len, float sigout[])
69 {
70 float bias;
71 int i;
72
73 bias = 0.0f;
74 for (i = 0; i < len; i++)
75 bias += speech[i];
76 bias /= len;
77 for (i = 0; i < len; i++)
78 sigout[i] = speech[i] - bias;
79 }
80 /*- End of function --------------------------------------------------------*/
81
82 static void eval_amdf(float speech[],
83 int32_t lpita,
84 const int32_t tau[],
85 int32_t ltau,
86 int32_t maxlag,
87 float amdf[],
88 int32_t *minptr,
89 int32_t *maxptr)
90 {
91 float sum;
92 int i;
93 int j;
94 int n1;
95 int n2;
96
97 *minptr = 0;
98 *maxptr = 0;
99 for (i = 0; i < ltau; i++)
100 {
101 n1 = (maxlag - tau[i])/2 + 1;
102 n2 = n1 + lpita - 1;
103 sum = 0.0f;
104 for (j = n1; j <= n2; j += 4)
105 sum += fabsf(speech[j - 1] - speech[j + tau[i] - 1]);
106 amdf[i] = sum;
107 if (amdf[i] < amdf[*minptr])
108 *minptr = i;
109 if (amdf[i] > amdf[*maxptr])
110 *maxptr = i;
111 }
112 }
113 /*- End of function --------------------------------------------------------*/
114
115 static void eval_highres_amdf(float speech[],
116 int32_t lpita,
117 const int32_t tau[],
118 int32_t ltau,
119 float amdf[],
120 int32_t *minptr,
121 int32_t *maxptr,
122 int32_t *mintau)
123 {
124 float amdf2[6];
125 int32_t tau2[6];
126 int32_t minp2;
127 int32_t ltau2;
128 int32_t maxp2;
129 int32_t minamd;
130 int i;
131 int i2;
132 int ptr;
133
134 /* Compute full AMDF using log spaced lags, find coarse minimum */
135 eval_amdf(speech, lpita, tau, ltau, tau[ltau - 1], amdf, minptr, maxptr);
136 *mintau = tau[*minptr];
137 minamd = (int32_t) amdf[*minptr];
138
139 /* Build table containing all lags within +/- 3 of the AMDF minimum,
140 excluding all that have already been computed */
141 ltau2 = 0;
142 ptr = *minptr - 2;
143 i2 = min(*mintau + 4, tau[ltau - 1]);
144 for (i = max(*mintau - 3, 41); i < i2; i++)
145 {
146 while (tau[ptr] < i)
147 ptr++;
148 if (tau[ptr] != i)
149 tau2[ltau2++] = i;
150 }
151 /* Compute AMDF of the new lags, if there are any, and choose one
152 if it is better than the coarse minimum */
153 if (ltau2 > 0)
154 {
155 eval_amdf(speech, lpita, tau2, ltau2, tau[ltau - 1], amdf2, &minp2, &maxp2);
156 if (amdf2[minp2] < (float) minamd)
157 {
158 *mintau = tau2[minp2];
159 minamd = (int32_t) amdf2[minp2];
160 }
161 }
162 /* Check one octave up, if there are any lags not yet computed */
163 if (*mintau >= 80)
164 {
165 i = *mintau/2;
166 if ((i & 1) == 0)
167 {
168 ltau2 = 2;
169 tau2[0] = i - 1;
170 tau2[1] = i + 1;
171 }
172 else
173 {
174 ltau2 = 1;
175 tau2[0] = i;
176 }
177 eval_amdf(speech, lpita, tau2, ltau2, tau[ltau - 1], amdf2, &minp2, &maxp2);
178 if (amdf2[minp2] < (float) minamd)
179 {
180 *mintau = tau2[minp2];
181 minamd = (int32_t) amdf2[minp2];
182 *minptr -= 20;
183 }
184 }
185 /* Force minimum of the AMDF array to the high resolution minimum */
186 amdf[*minptr] = (float) minamd;
187 /* Find maximum of AMDF within 1/2 octave of minimum */
188 *maxptr = max(*minptr - 5, 0);
189 i2 = min(*minptr + 6, ltau);
190 for (i = *maxptr; i < i2; i++)
191 {
192 if (amdf[i] > amdf[*maxptr])
193 *maxptr = i;
194 }
195 }
196 /*- End of function --------------------------------------------------------*/
197
198 static void dynamic_pitch_tracking(lpc10_encode_state_t *s,
199 float amdf[],
200 int32_t ltau,
201 int32_t *minptr,
202 int32_t voice,
203 int32_t *pitch,
204 int32_t *midx)
205 {
206 int32_t pbar;
207 float sbar;
208 int32_t path[2];
209 int32_t i;
210 int32_t j;
211 float alpha;
212 float minsc;
213 float maxsc;
214
215 /* Calculate the confidence factor ALPHA, used as a threshold slope in */
216 /* SEESAW. If unvoiced, set high slope so that every point in P array */
217 /*is marked as a potential pitch frequency. A scaled up version (ALPHAX )*/
218 /* is used to maintain arithmetic precision. */
219 if (voice == 1)
220 s->alphax = s->alphax*0.75f + amdf[*minptr - 1]*0.5f;
221 else
222 s->alphax *= 0.984375f;
223 alpha = s->alphax/16;
224 if (voice == 0 && s->alphax < 128.0f)
225 alpha = 8.0f;
226 /* SEESAW: Construct a pitch pointer array and intermediate winner function */
227 /* Left to right pass: */
228 s->p[s->ipoint][0] = 1;
229 pbar = 1;
230 sbar = s->s[0];
231 for (i = 0; i < ltau; i++)
232 {
233 sbar += alpha;
234 if (sbar < s->s[i])
235 {
236 s->s[i] = sbar;
237 }
238 else
239 {
240 pbar = i + 1;
241 sbar = s->s[i];
242 }
243 s->p[s->ipoint][i] = pbar;
244 }
245 /* Right to left pass: */
246 sbar = s->s[pbar - 1];
247 for (i = pbar - 2; i >= 0; i--)
248 {
249 sbar += alpha;
250 if (sbar < s->s[i])
251 {
252 s->s[i] = sbar;
253 s->p[s->ipoint][i] = pbar;
254 }
255 else
256 {
257 pbar = s->p[s->ipoint][i];
258 i = pbar - 1;
259 sbar = s->s[i];
260 }
261 }
262 /* Update S using AMDF */
263 /* Find maximum, minimum, and location of minimum */
264 s->s[0] += amdf[0]/2;
265 minsc = s->s[0];
266 maxsc = minsc;
267 *midx = 1;
268 for (i = 1; i < ltau; i++)
269 {
270 s->s[i] += amdf[i]/2;
271 if (s->s[i] > maxsc)
272 maxsc = s->s[i];
273 if (s->s[i] < minsc)
274 {
275 *midx = i + 1;
276 minsc = s->s[i];
277 }
278 }
279 /* Subtract MINSC from S to prevent overflow */
280 for (i = 0; i < ltau; i++)
281 s->s[i] -= minsc;
282 maxsc -= minsc;
283 /* Use higher octave pitch if significant null there */
284 j = 0;
285 for (i = 20; i <= 40; i += 10)
286 {
287 if (*midx > i)
288 {
289 if (s->s[*midx - i - 1] < maxsc / 4)
290 j = i;
291 }
292 }
293 *midx -= j;
294 /* TRACE: look back two frames to find minimum cost pitch estimate */
295 *pitch = *midx;
296 for (i = 0, j = s->ipoint; i < 2; i++, j++)
297 {
298 *pitch = s->p[j & 1][*pitch - 1];
299 path[i] = *pitch;
300 }
301
302 /* The following statement subtracts one from IPOINT, mod DEPTH. I */
303 /* think the author chose to add DEPTH-1, instead of subtracting 1, */
304 /* because then it will work even if MOD doesn't work as desired on */
305 /* negative arguments. */
306 s->ipoint = (s->ipoint + 1) & 1;
307 }
308 /*- End of function --------------------------------------------------------*/
309
310 /* Detection of onsets in (or slightly preceding) the futuremost frame of speech. */
311 static void onset(lpc10_encode_state_t *s,
312 float *pebuf,
313 int32_t osbuf[],
314 int32_t *osptr,
315 int32_t oslen,
316 int32_t sbufl,
317 int32_t sbufh,
318 int32_t lframe)
319 {
320 int32_t i;
321 float r1;
322 float l2sum2;
323
324 pebuf -= sbufl;
325
326 if (s->hyst)
327 s->lasti -= lframe;
328 for (i = sbufh - lframe + 1; i <= sbufh; i++)
329 {
330 /* Compute FPC; Use old FPC on divide by zero; Clamp FPC to +/- 1. */
331 s->n = (pebuf[i]*pebuf[i - 1] + s->n*63.0f)/64.0f;
332 /* Computing 2nd power */
333 r1 = pebuf[i - 1];
334 s->d__ = (r1*r1 + s->d__*63.0f)/64.0f;
335 if (s->d__ != 0.0f)
336 {
337 if (fabsf(s->n) > s->d__)
338 s->fpc = r_sign(1.0f, s->n);
339 else
340 s->fpc = s->n/s->d__;
341 }
342 /* Filter FPC */
343 l2sum2 = s->l2buf[s->l2ptr1 - 1];
344 s->l2sum1 = s->l2sum1 - s->l2buf[s->l2ptr2 - 1] + s->fpc;
345 s->l2buf[s->l2ptr2 - 1] = s->l2sum1;
346 s->l2buf[s->l2ptr1 - 1] = s->fpc;
347 s->l2ptr1 = (s->l2ptr1 & 0xF) + 1;
348 s->l2ptr2 = (s->l2ptr2 & 0xF) + 1;
349 if (fabsf(s->l2sum1 - l2sum2) > 1.7f)
350 {
351 if (!s->hyst)
352 {
353 /* Ignore if buffer full */
354 if (*osptr <= oslen)
355 {
356 osbuf[*osptr - 1] = i - 9;
357 (*osptr)++;
358 }
359 s->hyst = TRUE;
360 }
361 s->lasti = i;
362 /* After one onset detection, at least OSHYST sample times must go */
363 /* by before another is allowed to occur. */
364 }
365 else if (s->hyst && i - s->lasti >= 10)
366 {
367 s->hyst = FALSE;
368 }
369 }
370 }
371 /*- End of function --------------------------------------------------------*/
372
373 /* Load a covariance matrix. */
374 static void mload(int32_t order, int32_t awins, int32_t awinf, float speech[], float phi[], float psi[])
375 {
376 int32_t start;
377 int i;
378 int r;
379
380 start = awins + order;
381 for (r = 1; r <= order; r++)
382 {
383 phi[r - 1] = 0.0f;
384 for (i = start; i <= awinf; i++)
385 phi[r - 1] += speech[i - 2]*speech[i - r - 1];
386 }
387
388 /* Load last element of vector PSI */
389 psi[order - 1] = 0.0f;
390 for (i = start - 1; i < awinf; i++)
391 psi[order - 1] += speech[i]*speech[i - order];
392 /* End correct to get additional columns of phi */
393 for (r = 1; r < order; r++)
394 {
395 for (i = 1; i <= r; i++)
396 {
397 phi[i*order + r] = phi[(i - 1)*order + r - 1]
398 - speech[awinf - (r + 1)]*speech[awinf - (i + 1)]
399 + speech[start - (r + 2)]*speech[start - (i + 2)];
400 }
401 }
402 /* End correct to get additional elements of PSI */
403 for (i = 0; i < order - 1; i++)
404 {
405 psi[i] = phi[i + 1]
406 - speech[start - 2]*speech[start - i - 3]
407 + speech[awinf - 1]*speech[awinf - i - 2];
408 }
409 }
410 /*- End of function --------------------------------------------------------*/
411
412 /* Preemphasize speech with a single-zero filter. */
413 /* (When coef = .9375, preemphasis is as in LPC43.) */
414 static float preemp(float inbuf[], float pebuf[], int nsamp, float coeff, float z)
415 {
416 float temp;
417 int i;
418
419 for (i = 0; i < nsamp; i++)
420 {
421 temp = inbuf[i] - coeff*z;
422 z = inbuf[i];
423 pebuf[i] = temp;
424 }
425 return z;
426 }
427 /*- End of function --------------------------------------------------------*/
428
429 /* Invert a covariance matrix using Choleski decomposition method. */
430 static void invert(int32_t order, float phi[], float psi[], float rc[])
431 {
432 float r1;
433 int32_t i;
434 int32_t j;
435 int32_t k;
436 float v[10][10];
437
438 for (j = 0; j < order; j++)
439 {
440 for (i = j; i < order; i++)
441 v[j][i] = phi[i + j*order];
442 for (k = 0; k < j; k++)
443 {
444 r1 = v[k][j]*v[k][k];
445 for (i = j; i <= order; i++)
446 v[j][i] -= v[k][i]*r1;
447 }
448 /* Compute intermediate results, which are similar to RC's */
449 if (fabsf(v[j][j]) < 1.0e-10f)
450 {
451 for (i = j; i < order; i++)
452 rc[i] = 0.0f;
453 return;
454 }
455 rc[j] = psi[j];
456 for (k = 0; k < j; k++)
457 rc[j] -= rc[k]*v[k][j];
458 v[j][j] = 1.0f/v[j][j];
459 rc[j] *= v[j][j];
460 r1 = min(rc[j], 0.999f);
461 rc[j] = max(r1, -0.999f);
462 }
463 }
464 /*- End of function --------------------------------------------------------*/
465
466 /* Check RC's, repeat previous frame's RC's if unstable */
467 static int rcchk(int order, float rc1f[], float rc2f[])
468 {
469 int i;
470
471 for (i = 0; i < order; i++)
472 {
473 if (fabsf(rc2f[i]) > 0.99f)
474 {
475 for (i = 0; i < order; i++)
476 rc2f[i] = rc1f[i];
477 break;
478 }
479 }
480 return 0;
481 }
482 /*- End of function --------------------------------------------------------*/
483
484 static void lpfilt(float inbuf[], float lpbuf[], int32_t len, int32_t nsamp)
485 {
486 int32_t j;
487 float t;
488
489 /* 31 point equiripple FIR LPF */
490 /* Linear phase, delay = 15 samples */
491 /* Passband: ripple = 0.25 dB, cutoff = 800 Hz */
492 /* Stopband: atten. = 40. dB, cutoff = 1240 Hz */
493
494 for (j = len - nsamp; j < len; j++)
495 {
496 t = (inbuf[j] + inbuf[j - 30]) * -0.0097201988f;
497 t += (inbuf[j - 1] + inbuf[j - 29]) * -0.0105179986f;
498 t += (inbuf[j - 2] + inbuf[j - 28]) * -0.0083479648f;
499 t += (inbuf[j - 3] + inbuf[j - 27]) * 5.860774e-4f;
500 t += (inbuf[j - 4] + inbuf[j - 26]) * 0.0130892089f;
501 t += (inbuf[j - 5] + inbuf[j - 25]) * 0.0217052232f;
502 t += (inbuf[j - 6] + inbuf[j - 24]) * 0.0184161253f;
503 t += (inbuf[j - 7] + inbuf[j - 23]) * 3.39723e-4f;
504 t += (inbuf[j - 8] + inbuf[j - 22]) * -0.0260797087f;
505 t += (inbuf[j - 9] + inbuf[j - 21]) * -0.0455563702f;
506 t += (inbuf[j - 10] + inbuf[j - 20]) * -0.040306855f;
507 t += (inbuf[j - 11] + inbuf[j - 19]) * 5.029835e-4f;
508 t += (inbuf[j - 12] + inbuf[j - 18]) * 0.0729262903f;
509 t += (inbuf[j - 13] + inbuf[j - 17]) * 0.1572008878f;
510 t += (inbuf[j - 14] + inbuf[j - 16]) * 0.2247288674f;
511 t += inbuf[j - 15] * 0.250535965f;
512 lpbuf[j] = t;
513 }
514 }
515 /*- End of function --------------------------------------------------------*/
516
517 /* 2nd order inverse filter, speech is decimated 4:1 */
518 static void ivfilt(float lpbuf[], float ivbuf[], int32_t len, int32_t nsamp, float ivrc[])
519 {
520 int32_t i;
521 int32_t j;
522 int32_t k;
523 float r[3];
524 float pc1;
525 float pc2;
526
527 /* Calculate autocorrelations */
528 for (i = 1; i <= 3; i++)
529 {
530 r[i - 1] = 0.0f;
531 k = (i - 1) << 2;
532 for (j = (i << 2) + len - nsamp; j <= len; j += 2)
533 r[i - 1] += lpbuf[j - 1]*lpbuf[j - k - 1];
534 }
535 /* Calculate predictor coefficients */
536 pc1 = 0.0f;
537 pc2 = 0.0f;
538 ivrc[0] = 0.0f;
539 ivrc[1] = 0.0f;
540 if (r[0] > 1.0e-10f)
541 {
542 ivrc[0] = r[1]/r[0];
543 ivrc[1] = (r[2] - ivrc[0]*r[1])/(r[0] - ivrc[0]*r[1]);
544 pc1 = ivrc[0] - ivrc[0]*ivrc[1];
545 pc2 = ivrc[1];
546 }
547 /* Inverse filter LPBUF into IVBUF */
548 for (i = len - nsamp; i < len; i++)
549 ivbuf[i] = lpbuf[i] - pc1*lpbuf[i - 4] - pc2*lpbuf[i - 8];
550 }
551 /*- End of function --------------------------------------------------------*/
552
553 void lpc10_analyse(lpc10_encode_state_t *s, float speech[], int32_t voice[], int32_t *pitch, float *rms, float rc[])
554 {
555 static const int32_t tau[60] =
556 {
557 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
558 35, 36, 37, 38, 39, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58,
559 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 84, 88, 92, 96,
560 100, 104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144,
561 148, 152, 156
562 };
563 static const int32_t buflim[4] =
564 {
565 181, 720, 25, 720
566 };
567 static const float precoef = 0.9375f;
568
569 float amdf[60];
570 float abuf[156];
571 float ivrc[2];
572 float temp;
573 float phi[100] /* was [10][10] */;
574 float psi[10];
575 int32_t half;
576 int32_t midx;
577 int32_t ewin[3][2];
578 int32_t i;
579 int32_t j;
580 int32_t lanal;
581 int32_t ipitch;
582 int32_t mintau;
583 int32_t minptr;
584 int32_t maxptr;
585
586 /* Calculations are done on future frame due to requirements
587 of the pitch tracker. Delay RMS and RC's 2 frames to give
588 current frame parameters on return. */
589
590 for (i = 0; i <= 720 - LPC10_SAMPLES_PER_FRAME - 181; i++)
591 {
592 s->inbuf[i] = s->inbuf[LPC10_SAMPLES_PER_FRAME + i];
593 s->pebuf[i] = s->pebuf[LPC10_SAMPLES_PER_FRAME + i];
594 }
595 for (i = 0; i <= 540 - LPC10_SAMPLES_PER_FRAME - 229; i++)
596 s->ivbuf[i] = s->ivbuf[LPC10_SAMPLES_PER_FRAME + i];
597 for (i = 0; i <= 720 - LPC10_SAMPLES_PER_FRAME - 25; i++)
598 s->lpbuf[i] = s->lpbuf[LPC10_SAMPLES_PER_FRAME + i];
599 for (i = 0, j = 0; i < s->osptr - 1; i++)
600 {
601 if (s->osbuf[i] > LPC10_SAMPLES_PER_FRAME)
602 s->osbuf[j++] = s->osbuf[i] - LPC10_SAMPLES_PER_FRAME;
603 }
604 s->osptr = j + 1;
605 s->voibuf[0][0] = s->voibuf[1][0];
606 s->voibuf[0][1] = s->voibuf[1][1];
607 for (i = 0; i < 2; i++)
608 {
609 s->vwin[i][0] = s->vwin[i + 1][0] - LPC10_SAMPLES_PER_FRAME;
610 s->vwin[i][1] = s->vwin[i + 1][1] - LPC10_SAMPLES_PER_FRAME;
611 s->awin[i][0] = s->awin[i + 1][0] - LPC10_SAMPLES_PER_FRAME;
612 s->awin[i][1] = s->awin[i + 1][1] - LPC10_SAMPLES_PER_FRAME;
613 s->obound[i] = s->obound[i + 1];
614 s->voibuf[i + 1][0] = s->voibuf[i + 2][0];
615 s->voibuf[i + 1][1] = s->voibuf[i + 2][1];
616 s->rmsbuf[i] = s->rmsbuf[i + 1];
617 for (j = 0; j < LPC10_ORDER; j++)
618 s->rcbuf[i][j] = s->rcbuf[i + 1][j];
619 }
620 /* If the average value in the frame was over 1/4096 (after current
621 BIAS correction), then subtract that much more from samples in the
622 next frame. If the average value in the frame was under
623 -1/4096, add 1/4096 more to samples in next frame. In all other
624 cases, keep BIAS the same. */
625 temp = 0.0f;
626 for (i = 0; i < LPC10_SAMPLES_PER_FRAME; i++)
627 {
628 s->inbuf[720 - 2*LPC10_SAMPLES_PER_FRAME + i] = speech[i]*4096.0f - s->bias;
629 temp += s->inbuf[720 - 2*LPC10_SAMPLES_PER_FRAME + i];
630 }
631 if (temp > (float) LPC10_SAMPLES_PER_FRAME)
632 s->bias++;
633 else if (temp < (float) (-LPC10_SAMPLES_PER_FRAME))
634 s->bias--;
635 /* Place voicing window */
636 i = 721 - LPC10_SAMPLES_PER_FRAME;
637 s->zpre = preemp(&s->inbuf[i - 181], &s->pebuf[i - 181], LPC10_SAMPLES_PER_FRAME, precoef, s->zpre);
638 onset(s, s->pebuf, s->osbuf, &s->osptr, 10, 181, 720, LPC10_SAMPLES_PER_FRAME);
639
640 lpc10_placev(s->osbuf, &s->osptr, 10, &s->obound[2], s->vwin, 3, LPC10_SAMPLES_PER_FRAME, 90, 156, 307, 462);
641 /* The Pitch Extraction algorithm estimates the pitch for a frame
642 of speech by locating the minimum of the average magnitude difference
643 function (AMDF). The AMDF operates on low-pass, inverse filtered
644 speech. (The low-pass filter is an 800 Hz, 19 tap, equiripple, FIR
645 filter and the inverse filter is a 2nd-order LPC filter.) The pitch
646 estimate is later refined by dynamic tracking. However, since some
647 of the tracking parameters are a function of the voicing decisions,
648 a voicing decision must precede the final pitch estimation. */
649 /* See subroutines LPFILT, IVFILT, and eval_highres_amdf. */
650 /* LPFILT reads indices LBUFH-LFRAME-29 = 511 through LBUFH = 720
651 of INBUF, and writes indices LBUFH+1-LFRAME = 541 through LBUFH
652 = 720 of LPBUF. */
653 lpfilt(&s->inbuf[228], &s->lpbuf[384], 312, LPC10_SAMPLES_PER_FRAME);
654 /* IVFILT reads indices (PWINH-LFRAME-7) = 353 through PWINH = 540
655 of LPBUF, and writes indices (PWINH-LFRAME+1) = 361 through
656 PWINH = 540 of IVBUF. */
657 ivfilt(&s->lpbuf[204], s->ivbuf, 312, LPC10_SAMPLES_PER_FRAME, ivrc);
658 /* eval_highres_amdf reads indices PWINL = 229 through
659 (PWINL-1)+MAXWIN+(TAU(LTAU)-TAU(1))/2 = 452 of IVBUF, and writes
660 indices 1 through LTAU = 60 of AMDF. */
661 eval_highres_amdf(s->ivbuf, 156, tau, 60, amdf, &minptr, &maxptr, &mintau);
662 /* Voicing decisions are made for each half frame of input speech.
663 An initial voicing classification is made for each half of the
664 analysis frame, and the voicing decisions for the present frame
665 are finalized. See subroutine VOICIN. */
666 /* The voicing detector (VOICIN) classifies the input signal as
667 unvoiced (including silence) or voiced using the AMDF windowed
668 maximum-to-minimum ratio, the zero crossing rate, energy measures,
669 reflection coefficients, and prediction gains. */
670 /* The pitch and voicing rules apply smoothing and isolated
671 corrections to the pitch and voicing estimates and, in the process,
672 introduce two frames of delay into the corrected pitch estimates and
673 voicing decisions. */
674 for (half = 0; half < 2; half++)
675 {
676 lpc10_voicing(s,
677 &s->vwin[2][0],
678 s->inbuf,
679 s->lpbuf,
680 buflim,
681 half,
682 &amdf[minptr],
683 &amdf[maxptr],
684 &mintau,
685 ivrc,
686 s->obound);
687 }
688 /* Find the minimum cost pitch decision over several frames,
689 given the current voicing decision and the AMDF array */
690 minptr++;
691 dynamic_pitch_tracking(s, amdf, 60, &minptr, s->voibuf[3][1], pitch, &midx);
692 ipitch = tau[midx - 1];
693 /* Place spectrum analysis and energy windows */
694 lpc10_placea(&ipitch, s->voibuf, &s->obound[2], 3, s->vwin, s->awin, ewin, LPC10_SAMPLES_PER_FRAME, 156);
695 /* Remove short term DC bias over the analysis window. */
696 lanal = s->awin[2][1] + 1 - s->awin[2][0];
697 remove_dc_bias(&s->pebuf[s->awin[2][0] - 181], lanal, abuf);
698 /* Compute RMS over integer number of pitch periods within the analysis window. */
699 /* Note that in a hardware implementation this computation may be
700 simplified by using diagonal elements of phi computed by mload(). */
701 s->rmsbuf[2] = energyf(&abuf[ewin[2][0] - s->awin[2][0]], ewin[2][1] - ewin[2][0] + 1);
702 /* Matrix load and invert, check RC's for stability */
703 mload(LPC10_ORDER, 1, lanal, abuf, phi, psi);
704 invert(LPC10_ORDER, phi, psi, &s->rcbuf[2][0]);
705 rcchk(LPC10_ORDER, &s->rcbuf[1][0], &s->rcbuf[2][0]);
706 /* Set return parameters */
707 voice[0] = s->voibuf[1][0];
708 voice[1] = s->voibuf[1][1];
709 *rms = s->rmsbuf[0];
710 for (i = 0; i < LPC10_ORDER; i++)
711 rc[i] = s->rcbuf[0][i];
712 }
713 /*- End of function --------------------------------------------------------*/
714 /*- End of file ------------------------------------------------------------*/

Repositories maintained by Peter Meerwald, pmeerw@pmeerw.net.