5
|
1 /*
|
|
2 * SpanDSP - a series of DSP components for telephony
|
|
3 *
|
|
4 * lpc10_analyse.c - LPC10 low bit rate speech codec.
|
|
5 *
|
|
6 * Written by Steve Underwood <steveu@coppice.org>
|
|
7 *
|
|
8 * Copyright (C) 2006 Steve Underwood
|
|
9 *
|
|
10 * All rights reserved.
|
|
11 *
|
|
12 * This program is free software; you can redistribute it and/or modify
|
|
13 * it under the terms of the GNU General Public License version 2, as
|
|
14 * published by the Free Software Foundation.
|
|
15 *
|
|
16 * This program is distributed in the hope that it will be useful,
|
|
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
19 * GNU General Public License for more details.
|
|
20 *
|
|
21 * You should have received a copy of the GNU General Public License
|
|
22 * along with this program; if not, write to the Free Software
|
|
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
|
24 *
|
|
25 * This code is based on the U.S. Department of Defense reference
|
|
26 * implementation of the LPC-10 2400 bps Voice Coder. They do not
|
|
27 * exert copyright claims on their code, and it may be freely used.
|
|
28 *
|
|
29 * $Id: lpc10_analyse.c,v 1.12 2006/11/30 15:41:47 steveu Exp $
|
|
30 */
|
|
31
|
|
32 #ifdef HAVE_CONFIG_H
|
|
33 #include <config.h>
|
|
34 #endif
|
|
35
|
|
36 #include <stdlib.h>
|
|
37 #include <stdio.h>
|
|
38 #include <inttypes.h>
|
|
39 #include <memory.h>
|
|
40 #if defined(HAVE_TGMATH_H)
|
|
41 #include <tgmath.h>
|
|
42 #endif
|
|
43 #if defined(HAVE_MATH_H)
|
|
44 #include <math.h>
|
|
45 #endif
|
|
46
|
|
47 #include "spandsp/telephony.h"
|
|
48 #include "spandsp/dc_restore.h"
|
|
49 #include "spandsp/lpc10.h"
|
|
50
|
|
51 #include "lpc10_encdecs.h"
|
|
52
|
|
53 static __inline__ float energyf(float amp[], int len)
|
|
54 {
|
|
55 int i;
|
|
56 float rms;
|
|
57
|
|
58 rms = 0.0f;
|
|
59 for (i = 0; i < len; i++)
|
|
60 rms += amp[i]*amp[i];
|
|
61 rms = sqrtf(rms/len);
|
|
62 return rms;
|
|
63 }
|
|
64 /*- End of function --------------------------------------------------------*/
|
|
65
|
|
66 static void remove_dc_bias(float speech[], int len, float sigout[])
|
|
67 {
|
|
68 float bias;
|
|
69 int i;
|
|
70
|
|
71 bias = 0.0f;
|
|
72 for (i = 0; i < len; i++)
|
|
73 bias += speech[i];
|
|
74 bias /= len;
|
|
75 for (i = 0; i < len; i++)
|
|
76 sigout[i] = speech[i] - bias;
|
|
77 }
|
|
78 /*- End of function --------------------------------------------------------*/
|
|
79
|
|
80 static void eval_amdf(float speech[],
|
|
81 int32_t lpita,
|
|
82 const int32_t tau[],
|
|
83 int32_t ltau,
|
|
84 int32_t maxlag,
|
|
85 float amdf[],
|
|
86 int32_t *minptr,
|
|
87 int32_t *maxptr)
|
|
88 {
|
|
89 float sum;
|
|
90 int i;
|
|
91 int j;
|
|
92 int n1;
|
|
93 int n2;
|
|
94
|
|
95 *minptr = 0;
|
|
96 *maxptr = 0;
|
|
97 for (i = 0; i < ltau; i++)
|
|
98 {
|
|
99 n1 = (maxlag - tau[i])/2 + 1;
|
|
100 n2 = n1 + lpita - 1;
|
|
101 sum = 0.0f;
|
|
102 for (j = n1; j <= n2; j += 4)
|
|
103 sum += fabsf(speech[j - 1] - speech[j + tau[i] - 1]);
|
|
104 amdf[i] = sum;
|
|
105 if (amdf[i] < amdf[*minptr])
|
|
106 *minptr = i;
|
|
107 if (amdf[i] > amdf[*maxptr])
|
|
108 *maxptr = i;
|
|
109 }
|
|
110 }
|
|
111 /*- End of function --------------------------------------------------------*/
|
|
112
|
|
113 static void eval_highres_amdf(float speech[],
|
|
114 int32_t lpita,
|
|
115 const int32_t tau[],
|
|
116 int32_t ltau,
|
|
117 float amdf[],
|
|
118 int32_t *minptr,
|
|
119 int32_t *maxptr,
|
|
120 int32_t *mintau)
|
|
121 {
|
|
122 float amdf2[6];
|
|
123 int32_t tau2[6];
|
|
124 int32_t minp2;
|
|
125 int32_t ltau2;
|
|
126 int32_t maxp2;
|
|
127 int32_t minamd;
|
|
128 int i;
|
|
129 int i2;
|
|
130 int ptr;
|
|
131
|
|
132 /* Compute full AMDF using log spaced lags, find coarse minimum */
|
|
133 eval_amdf(speech, lpita, tau, ltau, tau[ltau - 1], amdf, minptr, maxptr);
|
|
134 *mintau = tau[*minptr];
|
|
135 minamd = (int32_t) amdf[*minptr];
|
|
136
|
|
137 /* Build table containing all lags within +/- 3 of the AMDF minimum,
|
|
138 excluding all that have already been computed */
|
|
139 ltau2 = 0;
|
|
140 ptr = *minptr - 2;
|
|
141 i2 = min(*mintau + 4, tau[ltau - 1]);
|
|
142 for (i = max(*mintau - 3, 41); i < i2; i++)
|
|
143 {
|
|
144 while (tau[ptr] < i)
|
|
145 ptr++;
|
|
146 if (tau[ptr] != i)
|
|
147 tau2[ltau2++] = i;
|
|
148 }
|
|
149 /* Compute AMDF of the new lags, if there are any, and choose one
|
|
150 if it is better than the coarse minimum */
|
|
151 if (ltau2 > 0)
|
|
152 {
|
|
153 eval_amdf(speech, lpita, tau2, ltau2, tau[ltau - 1], amdf2, &minp2, &maxp2);
|
|
154 if (amdf2[minp2] < (float) minamd)
|
|
155 {
|
|
156 *mintau = tau2[minp2];
|
|
157 minamd = (int32_t) amdf2[minp2];
|
|
158 }
|
|
159 }
|
|
160 /* Check one octave up, if there are any lags not yet computed */
|
|
161 if (*mintau >= 80)
|
|
162 {
|
|
163 i = *mintau/2;
|
|
164 if ((i & 1) == 0)
|
|
165 {
|
|
166 ltau2 = 2;
|
|
167 tau2[0] = i - 1;
|
|
168 tau2[1] = i + 1;
|
|
169 }
|
|
170 else
|
|
171 {
|
|
172 ltau2 = 1;
|
|
173 tau2[0] = i;
|
|
174 }
|
|
175 eval_amdf(speech, lpita, tau2, ltau2, tau[ltau - 1], amdf2, &minp2, &maxp2);
|
|
176 if (amdf2[minp2] < (float) minamd)
|
|
177 {
|
|
178 *mintau = tau2[minp2];
|
|
179 minamd = (int32_t) amdf2[minp2];
|
|
180 *minptr -= 20;
|
|
181 }
|
|
182 }
|
|
183 /* Force minimum of the AMDF array to the high resolution minimum */
|
|
184 amdf[*minptr] = (float) minamd;
|
|
185 /* Find maximum of AMDF within 1/2 octave of minimum */
|
|
186 *maxptr = max(*minptr - 5, 0);
|
|
187 i2 = min(*minptr + 6, ltau);
|
|
188 for (i = *maxptr; i < i2; i++)
|
|
189 {
|
|
190 if (amdf[i] > amdf[*maxptr])
|
|
191 *maxptr = i;
|
|
192 }
|
|
193 }
|
|
194 /*- End of function --------------------------------------------------------*/
|
|
195
|
|
196 static void dynamic_pitch_tracking(lpc10_encode_state_t *s,
|
|
197 float amdf[],
|
|
198 int32_t ltau,
|
|
199 int32_t *minptr,
|
|
200 int32_t voice,
|
|
201 int32_t *pitch,
|
|
202 int32_t *midx)
|
|
203 {
|
|
204 int32_t pbar;
|
|
205 float sbar;
|
|
206 int32_t path[2];
|
|
207 int32_t i;
|
|
208 int32_t j;
|
|
209 float alpha;
|
|
210 float minsc;
|
|
211 float maxsc;
|
|
212
|
|
213 /* Calculate the confidence factor ALPHA, used as a threshold slope in */
|
|
214 /* SEESAW. If unvoiced, set high slope so that every point in P array */
|
|
215 /*is marked as a potential pitch frequency. A scaled up version (ALPHAX )*/
|
|
216 /* is used to maintain arithmetic precision. */
|
|
217 if (voice == 1)
|
|
218 s->alphax = s->alphax*0.75f + amdf[*minptr - 1]*0.5f;
|
|
219 else
|
|
220 s->alphax *= 0.984375f;
|
|
221 alpha = s->alphax/16;
|
|
222 if (voice == 0 && s->alphax < 128.0f)
|
|
223 alpha = 8.0f;
|
|
224 /* SEESAW: Construct a pitch pointer array and intermediate winner function */
|
|
225 /* Left to right pass: */
|
|
226 s->p[s->ipoint][0] = 1;
|
|
227 pbar = 1;
|
|
228 sbar = s->s[0];
|
|
229 for (i = 0; i < ltau; i++)
|
|
230 {
|
|
231 sbar += alpha;
|
|
232 if (sbar < s->s[i])
|
|
233 {
|
|
234 s->s[i] = sbar;
|
|
235 }
|
|
236 else
|
|
237 {
|
|
238 pbar = i + 1;
|
|
239 sbar = s->s[i];
|
|
240 }
|
|
241 s->p[s->ipoint][i] = pbar;
|
|
242 }
|
|
243 /* Right to left pass: */
|
|
244 sbar = s->s[pbar - 1];
|
|
245 for (i = pbar - 2; i >= 0; i--)
|
|
246 {
|
|
247 sbar += alpha;
|
|
248 if (sbar < s->s[i])
|
|
249 {
|
|
250 s->s[i] = sbar;
|
|
251 s->p[s->ipoint][i] = pbar;
|
|
252 }
|
|
253 else
|
|
254 {
|
|
255 pbar = s->p[s->ipoint][i];
|
|
256 i = pbar - 1;
|
|
257 sbar = s->s[i];
|
|
258 }
|
|
259 }
|
|
260 /* Update S using AMDF */
|
|
261 /* Find maximum, minimum, and location of minimum */
|
|
262 s->s[0] += amdf[0]/2;
|
|
263 minsc = s->s[0];
|
|
264 maxsc = minsc;
|
|
265 *midx = 1;
|
|
266 for (i = 1; i < ltau; i++)
|
|
267 {
|
|
268 s->s[i] += amdf[i]/2;
|
|
269 if (s->s[i] > maxsc)
|
|
270 maxsc = s->s[i];
|
|
271 if (s->s[i] < minsc)
|
|
272 {
|
|
273 *midx = i + 1;
|
|
274 minsc = s->s[i];
|
|
275 }
|
|
276 }
|
|
277 /* Subtract MINSC from S to prevent overflow */
|
|
278 for (i = 0; i < ltau; i++)
|
|
279 s->s[i] -= minsc;
|
|
280 maxsc -= minsc;
|
|
281 /* Use higher octave pitch if significant null there */
|
|
282 j = 0;
|
|
283 for (i = 20; i <= 40; i += 10)
|
|
284 {
|
|
285 if (*midx > i)
|
|
286 {
|
|
287 if (s->s[*midx - i - 1] < maxsc / 4)
|
|
288 j = i;
|
|
289 }
|
|
290 }
|
|
291 *midx -= j;
|
|
292 /* TRACE: look back two frames to find minimum cost pitch estimate */
|
|
293 *pitch = *midx;
|
|
294 for (i = 0, j = s->ipoint; i < 2; i++, j++)
|
|
295 {
|
|
296 *pitch = s->p[j & 1][*pitch - 1];
|
|
297 path[i] = *pitch;
|
|
298 }
|
|
299
|
|
300 /* The following statement subtracts one from IPOINT, mod DEPTH. I */
|
|
301 /* think the author chose to add DEPTH-1, instead of subtracting 1, */
|
|
302 /* because then it will work even if MOD doesn't work as desired on */
|
|
303 /* negative arguments. */
|
|
304 s->ipoint = (s->ipoint + 1) & 1;
|
|
305 }
|
|
306 /*- End of function --------------------------------------------------------*/
|
|
307
|
|
308 /* Detection of onsets in (or slightly preceding) the futuremost frame of speech. */
|
|
309 static void onset(lpc10_encode_state_t *s,
|
|
310 float *pebuf,
|
|
311 int32_t osbuf[],
|
|
312 int32_t *osptr,
|
|
313 int32_t oslen,
|
|
314 int32_t sbufl,
|
|
315 int32_t sbufh,
|
|
316 int32_t lframe)
|
|
317 {
|
|
318 int32_t i;
|
|
319 float r1;
|
|
320 float l2sum2;
|
|
321
|
|
322 pebuf -= sbufl;
|
|
323
|
|
324 if (s->hyst)
|
|
325 s->lasti -= lframe;
|
|
326 for (i = sbufh - lframe + 1; i <= sbufh; i++)
|
|
327 {
|
|
328 /* Compute FPC; Use old FPC on divide by zero; Clamp FPC to +/- 1. */
|
|
329 s->n = (pebuf[i]*pebuf[i - 1] + s->n*63.0f)/64.0f;
|
|
330 /* Computing 2nd power */
|
|
331 r1 = pebuf[i - 1];
|
|
332 s->d__ = (r1*r1 + s->d__*63.0f)/64.0f;
|
|
333 if (s->d__ != 0.0f)
|
|
334 {
|
|
335 if (fabsf(s->n) > s->d__)
|
|
336 s->fpc = r_sign(1.0f, s->n);
|
|
337 else
|
|
338 s->fpc = s->n/s->d__;
|
|
339 }
|
|
340 /* Filter FPC */
|
|
341 l2sum2 = s->l2buf[s->l2ptr1 - 1];
|
|
342 s->l2sum1 = s->l2sum1 - s->l2buf[s->l2ptr2 - 1] + s->fpc;
|
|
343 s->l2buf[s->l2ptr2 - 1] = s->l2sum1;
|
|
344 s->l2buf[s->l2ptr1 - 1] = s->fpc;
|
|
345 s->l2ptr1 = (s->l2ptr1 & 0xF) + 1;
|
|
346 s->l2ptr2 = (s->l2ptr2 & 0xF) + 1;
|
|
347 if (fabsf(s->l2sum1 - l2sum2) > 1.7f)
|
|
348 {
|
|
349 if (!s->hyst)
|
|
350 {
|
|
351 /* Ignore if buffer full */
|
|
352 if (*osptr <= oslen)
|
|
353 {
|
|
354 osbuf[*osptr - 1] = i - 9;
|
|
355 (*osptr)++;
|
|
356 }
|
|
357 s->hyst = TRUE;
|
|
358 }
|
|
359 s->lasti = i;
|
|
360 /* After one onset detection, at least OSHYST sample times must go */
|
|
361 /* by before another is allowed to occur. */
|
|
362 }
|
|
363 else if (s->hyst && i - s->lasti >= 10)
|
|
364 {
|
|
365 s->hyst = FALSE;
|
|
366 }
|
|
367 }
|
|
368 }
|
|
369 /*- End of function --------------------------------------------------------*/
|
|
370
|
|
371 /* Load a covariance matrix. */
|
|
372 static void mload(int32_t order, int32_t awins, int32_t awinf, float speech[], float phi[], float psi[])
|
|
373 {
|
|
374 int32_t start;
|
|
375 int i;
|
|
376 int r;
|
|
377
|
|
378 start = awins + order;
|
|
379 for (r = 1; r <= order; r++)
|
|
380 {
|
|
381 phi[r - 1] = 0.0f;
|
|
382 for (i = start; i <= awinf; i++)
|
|
383 phi[r - 1] += speech[i - 2]*speech[i - r - 1];
|
|
384 }
|
|
385
|
|
386 /* Load last element of vector PSI */
|
|
387 psi[order - 1] = 0.0f;
|
|
388 for (i = start - 1; i < awinf; i++)
|
|
389 psi[order - 1] += speech[i]*speech[i - order];
|
|
390 /* End correct to get additional columns of phi */
|
|
391 for (r = 1; r < order; r++)
|
|
392 {
|
|
393 for (i = 1; i <= r; i++)
|
|
394 {
|
|
395 phi[i*order + r] = phi[(i - 1)*order + r - 1]
|
|
396 - speech[awinf - (r + 1)]*speech[awinf - (i + 1)]
|
|
397 + speech[start - (r + 2)]*speech[start - (i + 2)];
|
|
398 }
|
|
399 }
|
|
400 /* End correct to get additional elements of PSI */
|
|
401 for (i = 0; i < order - 1; i++)
|
|
402 {
|
|
403 psi[i] = phi[i + 1]
|
|
404 - speech[start - 2]*speech[start - i - 3]
|
|
405 + speech[awinf - 1]*speech[awinf - i - 2];
|
|
406 }
|
|
407 }
|
|
408 /*- End of function --------------------------------------------------------*/
|
|
409
|
|
410 /* Preemphasize speech with a single-zero filter. */
|
|
411 /* (When coef = .9375, preemphasis is as in LPC43.) */
|
|
412 static float preemp(float inbuf[], float pebuf[], int nsamp, float coeff, float z)
|
|
413 {
|
|
414 float temp;
|
|
415 int i;
|
|
416
|
|
417 for (i = 0; i < nsamp; i++)
|
|
418 {
|
|
419 temp = inbuf[i] - coeff*z;
|
|
420 z = inbuf[i];
|
|
421 pebuf[i] = temp;
|
|
422 }
|
|
423 return z;
|
|
424 }
|
|
425 /*- End of function --------------------------------------------------------*/
|
|
426
|
|
427 /* Invert a covariance matrix using Choleski decomposition method. */
|
|
428 static void invert(int32_t order, float phi[], float psi[], float rc[])
|
|
429 {
|
|
430 float r1;
|
|
431 int32_t i;
|
|
432 int32_t j;
|
|
433 int32_t k;
|
|
434 float v[10][10];
|
|
435
|
|
436 for (j = 0; j < order; j++)
|
|
437 {
|
|
438 for (i = j; i < order; i++)
|
|
439 v[j][i] = phi[i + j*order];
|
|
440 for (k = 0; k < j; k++)
|
|
441 {
|
|
442 r1 = v[k][j]*v[k][k];
|
|
443 for (i = j; i <= order; i++)
|
|
444 v[j][i] -= v[k][i]*r1;
|
|
445 }
|
|
446 /* Compute intermediate results, which are similar to RC's */
|
|
447 if (fabsf(v[j][j]) < 1.0e-10f)
|
|
448 {
|
|
449 for (i = j; i < order; i++)
|
|
450 rc[i] = 0.0f;
|
|
451 return;
|
|
452 }
|
|
453 rc[j] = psi[j];
|
|
454 for (k = 0; k < j; k++)
|
|
455 rc[j] -= rc[k]*v[k][j];
|
|
456 v[j][j] = 1.0f/v[j][j];
|
|
457 rc[j] *= v[j][j];
|
|
458 r1 = min(rc[j], 0.999f);
|
|
459 rc[j] = max(r1, -0.999f);
|
|
460 }
|
|
461 }
|
|
462 /*- End of function --------------------------------------------------------*/
|
|
463
|
|
464 /* Check RC's, repeat previous frame's RC's if unstable */
|
|
465 static int rcchk(int order, float rc1f[], float rc2f[])
|
|
466 {
|
|
467 int i;
|
|
468
|
|
469 for (i = 0; i < order; i++)
|
|
470 {
|
|
471 if (fabsf(rc2f[i]) > 0.99f)
|
|
472 {
|
|
473 for (i = 0; i < order; i++)
|
|
474 rc2f[i] = rc1f[i];
|
|
475 break;
|
|
476 }
|
|
477 }
|
|
478 return 0;
|
|
479 }
|
|
480 /*- End of function --------------------------------------------------------*/
|
|
481
|
|
482 static void lpfilt(float inbuf[], float lpbuf[], int32_t len, int32_t nsamp)
|
|
483 {
|
|
484 int32_t j;
|
|
485 float t;
|
|
486
|
|
487 /* 31 point equiripple FIR LPF */
|
|
488 /* Linear phase, delay = 15 samples */
|
|
489 /* Passband: ripple = 0.25 dB, cutoff = 800 Hz */
|
|
490 /* Stopband: atten. = 40. dB, cutoff = 1240 Hz */
|
|
491
|
|
492 for (j = len - nsamp; j < len; j++)
|
|
493 {
|
|
494 t = (inbuf[j] + inbuf[j - 30]) * -0.0097201988f;
|
|
495 t += (inbuf[j - 1] + inbuf[j - 29]) * -0.0105179986f;
|
|
496 t += (inbuf[j - 2] + inbuf[j - 28]) * -0.0083479648f;
|
|
497 t += (inbuf[j - 3] + inbuf[j - 27]) * 5.860774e-4f;
|
|
498 t += (inbuf[j - 4] + inbuf[j - 26]) * 0.0130892089f;
|
|
499 t += (inbuf[j - 5] + inbuf[j - 25]) * 0.0217052232f;
|
|
500 t += (inbuf[j - 6] + inbuf[j - 24]) * 0.0184161253f;
|
|
501 t += (inbuf[j - 7] + inbuf[j - 23]) * 3.39723e-4f;
|
|
502 t += (inbuf[j - 8] + inbuf[j - 22]) * -0.0260797087f;
|
|
503 t += (inbuf[j - 9] + inbuf[j - 21]) * -0.0455563702f;
|
|
504 t += (inbuf[j - 10] + inbuf[j - 20]) * -0.040306855f;
|
|
505 t += (inbuf[j - 11] + inbuf[j - 19]) * 5.029835e-4f;
|
|
506 t += (inbuf[j - 12] + inbuf[j - 18]) * 0.0729262903f;
|
|
507 t += (inbuf[j - 13] + inbuf[j - 17]) * 0.1572008878f;
|
|
508 t += (inbuf[j - 14] + inbuf[j - 16]) * 0.2247288674f;
|
|
509 t += inbuf[j - 15] * 0.250535965f;
|
|
510 lpbuf[j] = t;
|
|
511 }
|
|
512 }
|
|
513 /*- End of function --------------------------------------------------------*/
|
|
514
|
|
515 /* 2nd order inverse filter, speech is decimated 4:1 */
|
|
516 static void ivfilt(float lpbuf[], float ivbuf[], int32_t len, int32_t nsamp, float ivrc[])
|
|
517 {
|
|
518 int32_t i;
|
|
519 int32_t j;
|
|
520 int32_t k;
|
|
521 float r[3];
|
|
522 float pc1;
|
|
523 float pc2;
|
|
524
|
|
525 /* Calculate autocorrelations */
|
|
526 for (i = 1; i <= 3; i++)
|
|
527 {
|
|
528 r[i - 1] = 0.0f;
|
|
529 k = (i - 1) << 2;
|
|
530 for (j = (i << 2) + len - nsamp; j <= len; j += 2)
|
|
531 r[i - 1] += lpbuf[j - 1]*lpbuf[j - k - 1];
|
|
532 }
|
|
533 /* Calculate predictor coefficients */
|
|
534 pc1 = 0.0f;
|
|
535 pc2 = 0.0f;
|
|
536 ivrc[0] = 0.0f;
|
|
537 ivrc[1] = 0.0f;
|
|
538 if (r[0] > 1.0e-10f)
|
|
539 {
|
|
540 ivrc[0] = r[1]/r[0];
|
|
541 ivrc[1] = (r[2] - ivrc[0]*r[1])/(r[0] - ivrc[0]*r[1]);
|
|
542 pc1 = ivrc[0] - ivrc[0]*ivrc[1];
|
|
543 pc2 = ivrc[1];
|
|
544 }
|
|
545 /* Inverse filter LPBUF into IVBUF */
|
|
546 for (i = len - nsamp; i < len; i++)
|
|
547 ivbuf[i] = lpbuf[i] - pc1*lpbuf[i - 4] - pc2*lpbuf[i - 8];
|
|
548 }
|
|
549 /*- End of function --------------------------------------------------------*/
|
|
550
|
|
551 void lpc10_analyse(lpc10_encode_state_t *s, float speech[], int32_t voice[], int32_t *pitch, float *rms, float rc[])
|
|
552 {
|
|
553 static const int32_t tau[60] =
|
|
554 {
|
|
555 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
|
|
556 35, 36, 37, 38, 39, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58,
|
|
557 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 84, 88, 92, 96,
|
|
558 100, 104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144,
|
|
559 148, 152, 156
|
|
560 };
|
|
561 static const int32_t buflim[4] =
|
|
562 {
|
|
563 181, 720, 25, 720
|
|
564 };
|
|
565 static const float precoef = 0.9375f;
|
|
566
|
|
567 float amdf[60];
|
|
568 float abuf[156];
|
|
569 float ivrc[2];
|
|
570 float temp;
|
|
571 float phi[100] /* was [10][10] */;
|
|
572 float psi[10];
|
|
573 int32_t half;
|
|
574 int32_t midx;
|
|
575 int32_t ewin[3][2];
|
|
576 int32_t i;
|
|
577 int32_t j;
|
|
578 int32_t lanal;
|
|
579 int32_t ipitch;
|
|
580 int32_t mintau;
|
|
581 int32_t minptr;
|
|
582 int32_t maxptr;
|
|
583
|
|
584 /* Calculations are done on future frame due to requirements
|
|
585 of the pitch tracker. Delay RMS and RC's 2 frames to give
|
|
586 current frame parameters on return. */
|
|
587
|
|
588 for (i = 0; i <= 720 - LPC10_SAMPLES_PER_FRAME - 181; i++)
|
|
589 {
|
|
590 s->inbuf[i] = s->inbuf[LPC10_SAMPLES_PER_FRAME + i];
|
|
591 s->pebuf[i] = s->pebuf[LPC10_SAMPLES_PER_FRAME + i];
|
|
592 }
|
|
593 for (i = 0; i <= 540 - LPC10_SAMPLES_PER_FRAME - 229; i++)
|
|
594 s->ivbuf[i] = s->ivbuf[LPC10_SAMPLES_PER_FRAME + i];
|
|
595 for (i = 0; i <= 720 - LPC10_SAMPLES_PER_FRAME - 25; i++)
|
|
596 s->lpbuf[i] = s->lpbuf[LPC10_SAMPLES_PER_FRAME + i];
|
|
597 for (i = 0, j = 0; i < s->osptr - 1; i++)
|
|
598 {
|
|
599 if (s->osbuf[i] > LPC10_SAMPLES_PER_FRAME)
|
|
600 s->osbuf[j++] = s->osbuf[i] - LPC10_SAMPLES_PER_FRAME;
|
|
601 }
|
|
602 s->osptr = j + 1;
|
|
603 s->voibuf[0][0] = s->voibuf[1][0];
|
|
604 s->voibuf[0][1] = s->voibuf[1][1];
|
|
605 for (i = 0; i < 2; i++)
|
|
606 {
|
|
607 s->vwin[i][0] = s->vwin[i + 1][0] - LPC10_SAMPLES_PER_FRAME;
|
|
608 s->vwin[i][1] = s->vwin[i + 1][1] - LPC10_SAMPLES_PER_FRAME;
|
|
609 s->awin[i][0] = s->awin[i + 1][0] - LPC10_SAMPLES_PER_FRAME;
|
|
610 s->awin[i][1] = s->awin[i + 1][1] - LPC10_SAMPLES_PER_FRAME;
|
|
611 s->obound[i] = s->obound[i + 1];
|
|
612 s->voibuf[i + 1][0] = s->voibuf[i + 2][0];
|
|
613 s->voibuf[i + 1][1] = s->voibuf[i + 2][1];
|
|
614 s->rmsbuf[i] = s->rmsbuf[i + 1];
|
|
615 for (j = 0; j < LPC10_ORDER; j++)
|
|
616 s->rcbuf[i][j] = s->rcbuf[i + 1][j];
|
|
617 }
|
|
618 /* If the average value in the frame was over 1/4096 (after current
|
|
619 BIAS correction), then subtract that much more from samples in the
|
|
620 next frame. If the average value in the frame was under
|
|
621 -1/4096, add 1/4096 more to samples in next frame. In all other
|
|
622 cases, keep BIAS the same. */
|
|
623 temp = 0.0f;
|
|
624 for (i = 0; i < LPC10_SAMPLES_PER_FRAME; i++)
|
|
625 {
|
|
626 s->inbuf[720 - 2*LPC10_SAMPLES_PER_FRAME + i] = speech[i]*4096.0f - s->bias;
|
|
627 temp += s->inbuf[720 - 2*LPC10_SAMPLES_PER_FRAME + i];
|
|
628 }
|
|
629 if (temp > (float) LPC10_SAMPLES_PER_FRAME)
|
|
630 s->bias++;
|
|
631 else if (temp < (float) (-LPC10_SAMPLES_PER_FRAME))
|
|
632 s->bias--;
|
|
633 /* Place voicing window */
|
|
634 i = 721 - LPC10_SAMPLES_PER_FRAME;
|
|
635 s->zpre = preemp(&s->inbuf[i - 181], &s->pebuf[i - 181], LPC10_SAMPLES_PER_FRAME, precoef, s->zpre);
|
|
636 onset(s, s->pebuf, s->osbuf, &s->osptr, 10, 181, 720, LPC10_SAMPLES_PER_FRAME);
|
|
637
|
|
638 lpc10_placev(s->osbuf, &s->osptr, 10, &s->obound[2], s->vwin, 3, LPC10_SAMPLES_PER_FRAME, 90, 156, 307, 462);
|
|
639 /* The Pitch Extraction algorithm estimates the pitch for a frame
|
|
640 of speech by locating the minimum of the average magnitude difference
|
|
641 function (AMDF). The AMDF operates on low-pass, inverse filtered
|
|
642 speech. (The low-pass filter is an 800 Hz, 19 tap, equiripple, FIR
|
|
643 filter and the inverse filter is a 2nd-order LPC filter.) The pitch
|
|
644 estimate is later refined by dynamic tracking. However, since some
|
|
645 of the tracking parameters are a function of the voicing decisions,
|
|
646 a voicing decision must precede the final pitch estimation. */
|
|
647 /* See subroutines LPFILT, IVFILT, and eval_highres_amdf. */
|
|
648 /* LPFILT reads indices LBUFH-LFRAME-29 = 511 through LBUFH = 720
|
|
649 of INBUF, and writes indices LBUFH+1-LFRAME = 541 through LBUFH
|
|
650 = 720 of LPBUF. */
|
|
651 lpfilt(&s->inbuf[228], &s->lpbuf[384], 312, LPC10_SAMPLES_PER_FRAME);
|
|
652 /* IVFILT reads indices (PWINH-LFRAME-7) = 353 through PWINH = 540
|
|
653 of LPBUF, and writes indices (PWINH-LFRAME+1) = 361 through
|
|
654 PWINH = 540 of IVBUF. */
|
|
655 ivfilt(&s->lpbuf[204], s->ivbuf, 312, LPC10_SAMPLES_PER_FRAME, ivrc);
|
|
656 /* eval_highres_amdf reads indices PWINL = 229 through
|
|
657 (PWINL-1)+MAXWIN+(TAU(LTAU)-TAU(1))/2 = 452 of IVBUF, and writes
|
|
658 indices 1 through LTAU = 60 of AMDF. */
|
|
659 eval_highres_amdf(s->ivbuf, 156, tau, 60, amdf, &minptr, &maxptr, &mintau);
|
|
660 /* Voicing decisions are made for each half frame of input speech.
|
|
661 An initial voicing classification is made for each half of the
|
|
662 analysis frame, and the voicing decisions for the present frame
|
|
663 are finalized. See subroutine VOICIN. */
|
|
664 /* The voicing detector (VOICIN) classifies the input signal as
|
|
665 unvoiced (including silence) or voiced using the AMDF windowed
|
|
666 maximum-to-minimum ratio, the zero crossing rate, energy measures,
|
|
667 reflection coefficients, and prediction gains. */
|
|
668 /* The pitch and voicing rules apply smoothing and isolated
|
|
669 corrections to the pitch and voicing estimates and, in the process,
|
|
670 introduce two frames of delay into the corrected pitch estimates and
|
|
671 voicing decisions. */
|
|
672 for (half = 0; half < 2; half++)
|
|
673 {
|
|
674 lpc10_voicing(s,
|
|
675 &s->vwin[2][0],
|
|
676 s->inbuf,
|
|
677 s->lpbuf,
|
|
678 buflim,
|
|
679 half,
|
|
680 &amdf[minptr],
|
|
681 &amdf[maxptr],
|
|
682 &mintau,
|
|
683 ivrc,
|
|
684 s->obound);
|
|
685 }
|
|
686 /* Find the minimum cost pitch decision over several frames,
|
|
687 given the current voicing decision and the AMDF array */
|
|
688 minptr++;
|
|
689 dynamic_pitch_tracking(s, amdf, 60, &minptr, s->voibuf[3][1], pitch, &midx);
|
|
690 ipitch = tau[midx - 1];
|
|
691 /* Place spectrum analysis and energy windows */
|
|
692 lpc10_placea(&ipitch, s->voibuf, &s->obound[2], 3, s->vwin, s->awin, ewin, LPC10_SAMPLES_PER_FRAME, 156);
|
|
693 /* Remove short term DC bias over the analysis window. */
|
|
694 lanal = s->awin[2][1] + 1 - s->awin[2][0];
|
|
695 remove_dc_bias(&s->pebuf[s->awin[2][0] - 181], lanal, abuf);
|
|
696 /* Compute RMS over integer number of pitch periods within the analysis window. */
|
|
697 /* Note that in a hardware implementation this computation may be
|
|
698 simplified by using diagonal elements of phi computed by mload(). */
|
|
699 s->rmsbuf[2] = energyf(&abuf[ewin[2][0] - s->awin[2][0]], ewin[2][1] - ewin[2][0] + 1);
|
|
700 /* Matrix load and invert, check RC's for stability */
|
|
701 mload(LPC10_ORDER, 1, lanal, abuf, phi, psi);
|
|
702 invert(LPC10_ORDER, phi, psi, &s->rcbuf[2][0]);
|
|
703 rcchk(LPC10_ORDER, &s->rcbuf[1][0], &s->rcbuf[2][0]);
|
|
704 /* Set return parameters */
|
|
705 voice[0] = s->voibuf[1][0];
|
|
706 voice[1] = s->voibuf[1][1];
|
|
707 *rms = s->rmsbuf[0];
|
|
708 for (i = 0; i < LPC10_ORDER; i++)
|
|
709 rc[i] = s->rcbuf[0][i];
|
|
710 }
|
|
711 /*- End of function --------------------------------------------------------*/
|
|
712 /*- End of file ------------------------------------------------------------*/
|