Mercurial > hg > audiostuff
comparison spandsp-0.0.3/spandsp-0.0.3/src/lpc10_analyse.c @ 5:f762bf195c4b
import spandsp-0.0.3
author | Peter Meerwald <pmeerw@cosy.sbg.ac.at> |
---|---|
date | Fri, 25 Jun 2010 16:00:21 +0200 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
4:26cd8f1ef0b1 | 5:f762bf195c4b |
---|---|
1 /* | |
2 * SpanDSP - a series of DSP components for telephony | |
3 * | |
4 * lpc10_analyse.c - LPC10 low bit rate speech codec. | |
5 * | |
6 * Written by Steve Underwood <steveu@coppice.org> | |
7 * | |
8 * Copyright (C) 2006 Steve Underwood | |
9 * | |
10 * All rights reserved. | |
11 * | |
12 * This program is free software; you can redistribute it and/or modify | |
13 * it under the terms of the GNU General Public License version 2, as | |
14 * published by the Free Software Foundation. | |
15 * | |
16 * This program is distributed in the hope that it will be useful, | |
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
19 * GNU General Public License for more details. | |
20 * | |
21 * You should have received a copy of the GNU General Public License | |
22 * along with this program; if not, write to the Free Software | |
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
24 * | |
25 * This code is based on the U.S. Department of Defense reference | |
26 * implementation of the LPC-10 2400 bps Voice Coder. They do not | |
27 * exert copyright claims on their code, and it may be freely used. | |
28 * | |
29 * $Id: lpc10_analyse.c,v 1.12 2006/11/30 15:41:47 steveu Exp $ | |
30 */ | |
31 | |
32 #ifdef HAVE_CONFIG_H | |
33 #include <config.h> | |
34 #endif | |
35 | |
36 #include <stdlib.h> | |
37 #include <stdio.h> | |
38 #include <inttypes.h> | |
39 #include <memory.h> | |
40 #if defined(HAVE_TGMATH_H) | |
41 #include <tgmath.h> | |
42 #endif | |
43 #if defined(HAVE_MATH_H) | |
44 #include <math.h> | |
45 #endif | |
46 | |
47 #include "spandsp/telephony.h" | |
48 #include "spandsp/dc_restore.h" | |
49 #include "spandsp/lpc10.h" | |
50 | |
51 #include "lpc10_encdecs.h" | |
52 | |
53 static __inline__ float energyf(float amp[], int len) | |
54 { | |
55 int i; | |
56 float rms; | |
57 | |
58 rms = 0.0f; | |
59 for (i = 0; i < len; i++) | |
60 rms += amp[i]*amp[i]; | |
61 rms = sqrtf(rms/len); | |
62 return rms; | |
63 } | |
64 /*- End of function --------------------------------------------------------*/ | |
65 | |
66 static void remove_dc_bias(float speech[], int len, float sigout[]) | |
67 { | |
68 float bias; | |
69 int i; | |
70 | |
71 bias = 0.0f; | |
72 for (i = 0; i < len; i++) | |
73 bias += speech[i]; | |
74 bias /= len; | |
75 for (i = 0; i < len; i++) | |
76 sigout[i] = speech[i] - bias; | |
77 } | |
78 /*- End of function --------------------------------------------------------*/ | |
79 | |
80 static void eval_amdf(float speech[], | |
81 int32_t lpita, | |
82 const int32_t tau[], | |
83 int32_t ltau, | |
84 int32_t maxlag, | |
85 float amdf[], | |
86 int32_t *minptr, | |
87 int32_t *maxptr) | |
88 { | |
89 float sum; | |
90 int i; | |
91 int j; | |
92 int n1; | |
93 int n2; | |
94 | |
95 *minptr = 0; | |
96 *maxptr = 0; | |
97 for (i = 0; i < ltau; i++) | |
98 { | |
99 n1 = (maxlag - tau[i])/2 + 1; | |
100 n2 = n1 + lpita - 1; | |
101 sum = 0.0f; | |
102 for (j = n1; j <= n2; j += 4) | |
103 sum += fabsf(speech[j - 1] - speech[j + tau[i] - 1]); | |
104 amdf[i] = sum; | |
105 if (amdf[i] < amdf[*minptr]) | |
106 *minptr = i; | |
107 if (amdf[i] > amdf[*maxptr]) | |
108 *maxptr = i; | |
109 } | |
110 } | |
111 /*- End of function --------------------------------------------------------*/ | |
112 | |
113 static void eval_highres_amdf(float speech[], | |
114 int32_t lpita, | |
115 const int32_t tau[], | |
116 int32_t ltau, | |
117 float amdf[], | |
118 int32_t *minptr, | |
119 int32_t *maxptr, | |
120 int32_t *mintau) | |
121 { | |
122 float amdf2[6]; | |
123 int32_t tau2[6]; | |
124 int32_t minp2; | |
125 int32_t ltau2; | |
126 int32_t maxp2; | |
127 int32_t minamd; | |
128 int i; | |
129 int i2; | |
130 int ptr; | |
131 | |
132 /* Compute full AMDF using log spaced lags, find coarse minimum */ | |
133 eval_amdf(speech, lpita, tau, ltau, tau[ltau - 1], amdf, minptr, maxptr); | |
134 *mintau = tau[*minptr]; | |
135 minamd = (int32_t) amdf[*minptr]; | |
136 | |
137 /* Build table containing all lags within +/- 3 of the AMDF minimum, | |
138 excluding all that have already been computed */ | |
139 ltau2 = 0; | |
140 ptr = *minptr - 2; | |
141 i2 = min(*mintau + 4, tau[ltau - 1]); | |
142 for (i = max(*mintau - 3, 41); i < i2; i++) | |
143 { | |
144 while (tau[ptr] < i) | |
145 ptr++; | |
146 if (tau[ptr] != i) | |
147 tau2[ltau2++] = i; | |
148 } | |
149 /* Compute AMDF of the new lags, if there are any, and choose one | |
150 if it is better than the coarse minimum */ | |
151 if (ltau2 > 0) | |
152 { | |
153 eval_amdf(speech, lpita, tau2, ltau2, tau[ltau - 1], amdf2, &minp2, &maxp2); | |
154 if (amdf2[minp2] < (float) minamd) | |
155 { | |
156 *mintau = tau2[minp2]; | |
157 minamd = (int32_t) amdf2[minp2]; | |
158 } | |
159 } | |
160 /* Check one octave up, if there are any lags not yet computed */ | |
161 if (*mintau >= 80) | |
162 { | |
163 i = *mintau/2; | |
164 if ((i & 1) == 0) | |
165 { | |
166 ltau2 = 2; | |
167 tau2[0] = i - 1; | |
168 tau2[1] = i + 1; | |
169 } | |
170 else | |
171 { | |
172 ltau2 = 1; | |
173 tau2[0] = i; | |
174 } | |
175 eval_amdf(speech, lpita, tau2, ltau2, tau[ltau - 1], amdf2, &minp2, &maxp2); | |
176 if (amdf2[minp2] < (float) minamd) | |
177 { | |
178 *mintau = tau2[minp2]; | |
179 minamd = (int32_t) amdf2[minp2]; | |
180 *minptr -= 20; | |
181 } | |
182 } | |
183 /* Force minimum of the AMDF array to the high resolution minimum */ | |
184 amdf[*minptr] = (float) minamd; | |
185 /* Find maximum of AMDF within 1/2 octave of minimum */ | |
186 *maxptr = max(*minptr - 5, 0); | |
187 i2 = min(*minptr + 6, ltau); | |
188 for (i = *maxptr; i < i2; i++) | |
189 { | |
190 if (amdf[i] > amdf[*maxptr]) | |
191 *maxptr = i; | |
192 } | |
193 } | |
194 /*- End of function --------------------------------------------------------*/ | |
195 | |
196 static void dynamic_pitch_tracking(lpc10_encode_state_t *s, | |
197 float amdf[], | |
198 int32_t ltau, | |
199 int32_t *minptr, | |
200 int32_t voice, | |
201 int32_t *pitch, | |
202 int32_t *midx) | |
203 { | |
204 int32_t pbar; | |
205 float sbar; | |
206 int32_t path[2]; | |
207 int32_t i; | |
208 int32_t j; | |
209 float alpha; | |
210 float minsc; | |
211 float maxsc; | |
212 | |
213 /* Calculate the confidence factor ALPHA, used as a threshold slope in */ | |
214 /* SEESAW. If unvoiced, set high slope so that every point in P array */ | |
215 /*is marked as a potential pitch frequency. A scaled up version (ALPHAX )*/ | |
216 /* is used to maintain arithmetic precision. */ | |
217 if (voice == 1) | |
218 s->alphax = s->alphax*0.75f + amdf[*minptr - 1]*0.5f; | |
219 else | |
220 s->alphax *= 0.984375f; | |
221 alpha = s->alphax/16; | |
222 if (voice == 0 && s->alphax < 128.0f) | |
223 alpha = 8.0f; | |
224 /* SEESAW: Construct a pitch pointer array and intermediate winner function */ | |
225 /* Left to right pass: */ | |
226 s->p[s->ipoint][0] = 1; | |
227 pbar = 1; | |
228 sbar = s->s[0]; | |
229 for (i = 0; i < ltau; i++) | |
230 { | |
231 sbar += alpha; | |
232 if (sbar < s->s[i]) | |
233 { | |
234 s->s[i] = sbar; | |
235 } | |
236 else | |
237 { | |
238 pbar = i + 1; | |
239 sbar = s->s[i]; | |
240 } | |
241 s->p[s->ipoint][i] = pbar; | |
242 } | |
243 /* Right to left pass: */ | |
244 sbar = s->s[pbar - 1]; | |
245 for (i = pbar - 2; i >= 0; i--) | |
246 { | |
247 sbar += alpha; | |
248 if (sbar < s->s[i]) | |
249 { | |
250 s->s[i] = sbar; | |
251 s->p[s->ipoint][i] = pbar; | |
252 } | |
253 else | |
254 { | |
255 pbar = s->p[s->ipoint][i]; | |
256 i = pbar - 1; | |
257 sbar = s->s[i]; | |
258 } | |
259 } | |
260 /* Update S using AMDF */ | |
261 /* Find maximum, minimum, and location of minimum */ | |
262 s->s[0] += amdf[0]/2; | |
263 minsc = s->s[0]; | |
264 maxsc = minsc; | |
265 *midx = 1; | |
266 for (i = 1; i < ltau; i++) | |
267 { | |
268 s->s[i] += amdf[i]/2; | |
269 if (s->s[i] > maxsc) | |
270 maxsc = s->s[i]; | |
271 if (s->s[i] < minsc) | |
272 { | |
273 *midx = i + 1; | |
274 minsc = s->s[i]; | |
275 } | |
276 } | |
277 /* Subtract MINSC from S to prevent overflow */ | |
278 for (i = 0; i < ltau; i++) | |
279 s->s[i] -= minsc; | |
280 maxsc -= minsc; | |
281 /* Use higher octave pitch if significant null there */ | |
282 j = 0; | |
283 for (i = 20; i <= 40; i += 10) | |
284 { | |
285 if (*midx > i) | |
286 { | |
287 if (s->s[*midx - i - 1] < maxsc / 4) | |
288 j = i; | |
289 } | |
290 } | |
291 *midx -= j; | |
292 /* TRACE: look back two frames to find minimum cost pitch estimate */ | |
293 *pitch = *midx; | |
294 for (i = 0, j = s->ipoint; i < 2; i++, j++) | |
295 { | |
296 *pitch = s->p[j & 1][*pitch - 1]; | |
297 path[i] = *pitch; | |
298 } | |
299 | |
300 /* The following statement subtracts one from IPOINT, mod DEPTH. I */ | |
301 /* think the author chose to add DEPTH-1, instead of subtracting 1, */ | |
302 /* because then it will work even if MOD doesn't work as desired on */ | |
303 /* negative arguments. */ | |
304 s->ipoint = (s->ipoint + 1) & 1; | |
305 } | |
306 /*- End of function --------------------------------------------------------*/ | |
307 | |
308 /* Detection of onsets in (or slightly preceding) the futuremost frame of speech. */ | |
309 static void onset(lpc10_encode_state_t *s, | |
310 float *pebuf, | |
311 int32_t osbuf[], | |
312 int32_t *osptr, | |
313 int32_t oslen, | |
314 int32_t sbufl, | |
315 int32_t sbufh, | |
316 int32_t lframe) | |
317 { | |
318 int32_t i; | |
319 float r1; | |
320 float l2sum2; | |
321 | |
322 pebuf -= sbufl; | |
323 | |
324 if (s->hyst) | |
325 s->lasti -= lframe; | |
326 for (i = sbufh - lframe + 1; i <= sbufh; i++) | |
327 { | |
328 /* Compute FPC; Use old FPC on divide by zero; Clamp FPC to +/- 1. */ | |
329 s->n = (pebuf[i]*pebuf[i - 1] + s->n*63.0f)/64.0f; | |
330 /* Computing 2nd power */ | |
331 r1 = pebuf[i - 1]; | |
332 s->d__ = (r1*r1 + s->d__*63.0f)/64.0f; | |
333 if (s->d__ != 0.0f) | |
334 { | |
335 if (fabsf(s->n) > s->d__) | |
336 s->fpc = r_sign(1.0f, s->n); | |
337 else | |
338 s->fpc = s->n/s->d__; | |
339 } | |
340 /* Filter FPC */ | |
341 l2sum2 = s->l2buf[s->l2ptr1 - 1]; | |
342 s->l2sum1 = s->l2sum1 - s->l2buf[s->l2ptr2 - 1] + s->fpc; | |
343 s->l2buf[s->l2ptr2 - 1] = s->l2sum1; | |
344 s->l2buf[s->l2ptr1 - 1] = s->fpc; | |
345 s->l2ptr1 = (s->l2ptr1 & 0xF) + 1; | |
346 s->l2ptr2 = (s->l2ptr2 & 0xF) + 1; | |
347 if (fabsf(s->l2sum1 - l2sum2) > 1.7f) | |
348 { | |
349 if (!s->hyst) | |
350 { | |
351 /* Ignore if buffer full */ | |
352 if (*osptr <= oslen) | |
353 { | |
354 osbuf[*osptr - 1] = i - 9; | |
355 (*osptr)++; | |
356 } | |
357 s->hyst = TRUE; | |
358 } | |
359 s->lasti = i; | |
360 /* After one onset detection, at least OSHYST sample times must go */ | |
361 /* by before another is allowed to occur. */ | |
362 } | |
363 else if (s->hyst && i - s->lasti >= 10) | |
364 { | |
365 s->hyst = FALSE; | |
366 } | |
367 } | |
368 } | |
369 /*- End of function --------------------------------------------------------*/ | |
370 | |
371 /* Load a covariance matrix. */ | |
372 static void mload(int32_t order, int32_t awins, int32_t awinf, float speech[], float phi[], float psi[]) | |
373 { | |
374 int32_t start; | |
375 int i; | |
376 int r; | |
377 | |
378 start = awins + order; | |
379 for (r = 1; r <= order; r++) | |
380 { | |
381 phi[r - 1] = 0.0f; | |
382 for (i = start; i <= awinf; i++) | |
383 phi[r - 1] += speech[i - 2]*speech[i - r - 1]; | |
384 } | |
385 | |
386 /* Load last element of vector PSI */ | |
387 psi[order - 1] = 0.0f; | |
388 for (i = start - 1; i < awinf; i++) | |
389 psi[order - 1] += speech[i]*speech[i - order]; | |
390 /* End correct to get additional columns of phi */ | |
391 for (r = 1; r < order; r++) | |
392 { | |
393 for (i = 1; i <= r; i++) | |
394 { | |
395 phi[i*order + r] = phi[(i - 1)*order + r - 1] | |
396 - speech[awinf - (r + 1)]*speech[awinf - (i + 1)] | |
397 + speech[start - (r + 2)]*speech[start - (i + 2)]; | |
398 } | |
399 } | |
400 /* End correct to get additional elements of PSI */ | |
401 for (i = 0; i < order - 1; i++) | |
402 { | |
403 psi[i] = phi[i + 1] | |
404 - speech[start - 2]*speech[start - i - 3] | |
405 + speech[awinf - 1]*speech[awinf - i - 2]; | |
406 } | |
407 } | |
408 /*- End of function --------------------------------------------------------*/ | |
409 | |
410 /* Preemphasize speech with a single-zero filter. */ | |
411 /* (When coef = .9375, preemphasis is as in LPC43.) */ | |
412 static float preemp(float inbuf[], float pebuf[], int nsamp, float coeff, float z) | |
413 { | |
414 float temp; | |
415 int i; | |
416 | |
417 for (i = 0; i < nsamp; i++) | |
418 { | |
419 temp = inbuf[i] - coeff*z; | |
420 z = inbuf[i]; | |
421 pebuf[i] = temp; | |
422 } | |
423 return z; | |
424 } | |
425 /*- End of function --------------------------------------------------------*/ | |
426 | |
427 /* Invert a covariance matrix using Choleski decomposition method. */ | |
428 static void invert(int32_t order, float phi[], float psi[], float rc[]) | |
429 { | |
430 float r1; | |
431 int32_t i; | |
432 int32_t j; | |
433 int32_t k; | |
434 float v[10][10]; | |
435 | |
436 for (j = 0; j < order; j++) | |
437 { | |
438 for (i = j; i < order; i++) | |
439 v[j][i] = phi[i + j*order]; | |
440 for (k = 0; k < j; k++) | |
441 { | |
442 r1 = v[k][j]*v[k][k]; | |
443 for (i = j; i <= order; i++) | |
444 v[j][i] -= v[k][i]*r1; | |
445 } | |
446 /* Compute intermediate results, which are similar to RC's */ | |
447 if (fabsf(v[j][j]) < 1.0e-10f) | |
448 { | |
449 for (i = j; i < order; i++) | |
450 rc[i] = 0.0f; | |
451 return; | |
452 } | |
453 rc[j] = psi[j]; | |
454 for (k = 0; k < j; k++) | |
455 rc[j] -= rc[k]*v[k][j]; | |
456 v[j][j] = 1.0f/v[j][j]; | |
457 rc[j] *= v[j][j]; | |
458 r1 = min(rc[j], 0.999f); | |
459 rc[j] = max(r1, -0.999f); | |
460 } | |
461 } | |
462 /*- End of function --------------------------------------------------------*/ | |
463 | |
464 /* Check RC's, repeat previous frame's RC's if unstable */ | |
465 static int rcchk(int order, float rc1f[], float rc2f[]) | |
466 { | |
467 int i; | |
468 | |
469 for (i = 0; i < order; i++) | |
470 { | |
471 if (fabsf(rc2f[i]) > 0.99f) | |
472 { | |
473 for (i = 0; i < order; i++) | |
474 rc2f[i] = rc1f[i]; | |
475 break; | |
476 } | |
477 } | |
478 return 0; | |
479 } | |
480 /*- End of function --------------------------------------------------------*/ | |
481 | |
482 static void lpfilt(float inbuf[], float lpbuf[], int32_t len, int32_t nsamp) | |
483 { | |
484 int32_t j; | |
485 float t; | |
486 | |
487 /* 31 point equiripple FIR LPF */ | |
488 /* Linear phase, delay = 15 samples */ | |
489 /* Passband: ripple = 0.25 dB, cutoff = 800 Hz */ | |
490 /* Stopband: atten. = 40. dB, cutoff = 1240 Hz */ | |
491 | |
492 for (j = len - nsamp; j < len; j++) | |
493 { | |
494 t = (inbuf[j] + inbuf[j - 30]) * -0.0097201988f; | |
495 t += (inbuf[j - 1] + inbuf[j - 29]) * -0.0105179986f; | |
496 t += (inbuf[j - 2] + inbuf[j - 28]) * -0.0083479648f; | |
497 t += (inbuf[j - 3] + inbuf[j - 27]) * 5.860774e-4f; | |
498 t += (inbuf[j - 4] + inbuf[j - 26]) * 0.0130892089f; | |
499 t += (inbuf[j - 5] + inbuf[j - 25]) * 0.0217052232f; | |
500 t += (inbuf[j - 6] + inbuf[j - 24]) * 0.0184161253f; | |
501 t += (inbuf[j - 7] + inbuf[j - 23]) * 3.39723e-4f; | |
502 t += (inbuf[j - 8] + inbuf[j - 22]) * -0.0260797087f; | |
503 t += (inbuf[j - 9] + inbuf[j - 21]) * -0.0455563702f; | |
504 t += (inbuf[j - 10] + inbuf[j - 20]) * -0.040306855f; | |
505 t += (inbuf[j - 11] + inbuf[j - 19]) * 5.029835e-4f; | |
506 t += (inbuf[j - 12] + inbuf[j - 18]) * 0.0729262903f; | |
507 t += (inbuf[j - 13] + inbuf[j - 17]) * 0.1572008878f; | |
508 t += (inbuf[j - 14] + inbuf[j - 16]) * 0.2247288674f; | |
509 t += inbuf[j - 15] * 0.250535965f; | |
510 lpbuf[j] = t; | |
511 } | |
512 } | |
513 /*- End of function --------------------------------------------------------*/ | |
514 | |
515 /* 2nd order inverse filter, speech is decimated 4:1 */ | |
516 static void ivfilt(float lpbuf[], float ivbuf[], int32_t len, int32_t nsamp, float ivrc[]) | |
517 { | |
518 int32_t i; | |
519 int32_t j; | |
520 int32_t k; | |
521 float r[3]; | |
522 float pc1; | |
523 float pc2; | |
524 | |
525 /* Calculate autocorrelations */ | |
526 for (i = 1; i <= 3; i++) | |
527 { | |
528 r[i - 1] = 0.0f; | |
529 k = (i - 1) << 2; | |
530 for (j = (i << 2) + len - nsamp; j <= len; j += 2) | |
531 r[i - 1] += lpbuf[j - 1]*lpbuf[j - k - 1]; | |
532 } | |
533 /* Calculate predictor coefficients */ | |
534 pc1 = 0.0f; | |
535 pc2 = 0.0f; | |
536 ivrc[0] = 0.0f; | |
537 ivrc[1] = 0.0f; | |
538 if (r[0] > 1.0e-10f) | |
539 { | |
540 ivrc[0] = r[1]/r[0]; | |
541 ivrc[1] = (r[2] - ivrc[0]*r[1])/(r[0] - ivrc[0]*r[1]); | |
542 pc1 = ivrc[0] - ivrc[0]*ivrc[1]; | |
543 pc2 = ivrc[1]; | |
544 } | |
545 /* Inverse filter LPBUF into IVBUF */ | |
546 for (i = len - nsamp; i < len; i++) | |
547 ivbuf[i] = lpbuf[i] - pc1*lpbuf[i - 4] - pc2*lpbuf[i - 8]; | |
548 } | |
549 /*- End of function --------------------------------------------------------*/ | |
550 | |
551 void lpc10_analyse(lpc10_encode_state_t *s, float speech[], int32_t voice[], int32_t *pitch, float *rms, float rc[]) | |
552 { | |
553 static const int32_t tau[60] = | |
554 { | |
555 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, | |
556 35, 36, 37, 38, 39, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, | |
557 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 84, 88, 92, 96, | |
558 100, 104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144, | |
559 148, 152, 156 | |
560 }; | |
561 static const int32_t buflim[4] = | |
562 { | |
563 181, 720, 25, 720 | |
564 }; | |
565 static const float precoef = 0.9375f; | |
566 | |
567 float amdf[60]; | |
568 float abuf[156]; | |
569 float ivrc[2]; | |
570 float temp; | |
571 float phi[100] /* was [10][10] */; | |
572 float psi[10]; | |
573 int32_t half; | |
574 int32_t midx; | |
575 int32_t ewin[3][2]; | |
576 int32_t i; | |
577 int32_t j; | |
578 int32_t lanal; | |
579 int32_t ipitch; | |
580 int32_t mintau; | |
581 int32_t minptr; | |
582 int32_t maxptr; | |
583 | |
584 /* Calculations are done on future frame due to requirements | |
585 of the pitch tracker. Delay RMS and RC's 2 frames to give | |
586 current frame parameters on return. */ | |
587 | |
588 for (i = 0; i <= 720 - LPC10_SAMPLES_PER_FRAME - 181; i++) | |
589 { | |
590 s->inbuf[i] = s->inbuf[LPC10_SAMPLES_PER_FRAME + i]; | |
591 s->pebuf[i] = s->pebuf[LPC10_SAMPLES_PER_FRAME + i]; | |
592 } | |
593 for (i = 0; i <= 540 - LPC10_SAMPLES_PER_FRAME - 229; i++) | |
594 s->ivbuf[i] = s->ivbuf[LPC10_SAMPLES_PER_FRAME + i]; | |
595 for (i = 0; i <= 720 - LPC10_SAMPLES_PER_FRAME - 25; i++) | |
596 s->lpbuf[i] = s->lpbuf[LPC10_SAMPLES_PER_FRAME + i]; | |
597 for (i = 0, j = 0; i < s->osptr - 1; i++) | |
598 { | |
599 if (s->osbuf[i] > LPC10_SAMPLES_PER_FRAME) | |
600 s->osbuf[j++] = s->osbuf[i] - LPC10_SAMPLES_PER_FRAME; | |
601 } | |
602 s->osptr = j + 1; | |
603 s->voibuf[0][0] = s->voibuf[1][0]; | |
604 s->voibuf[0][1] = s->voibuf[1][1]; | |
605 for (i = 0; i < 2; i++) | |
606 { | |
607 s->vwin[i][0] = s->vwin[i + 1][0] - LPC10_SAMPLES_PER_FRAME; | |
608 s->vwin[i][1] = s->vwin[i + 1][1] - LPC10_SAMPLES_PER_FRAME; | |
609 s->awin[i][0] = s->awin[i + 1][0] - LPC10_SAMPLES_PER_FRAME; | |
610 s->awin[i][1] = s->awin[i + 1][1] - LPC10_SAMPLES_PER_FRAME; | |
611 s->obound[i] = s->obound[i + 1]; | |
612 s->voibuf[i + 1][0] = s->voibuf[i + 2][0]; | |
613 s->voibuf[i + 1][1] = s->voibuf[i + 2][1]; | |
614 s->rmsbuf[i] = s->rmsbuf[i + 1]; | |
615 for (j = 0; j < LPC10_ORDER; j++) | |
616 s->rcbuf[i][j] = s->rcbuf[i + 1][j]; | |
617 } | |
618 /* If the average value in the frame was over 1/4096 (after current | |
619 BIAS correction), then subtract that much more from samples in the | |
620 next frame. If the average value in the frame was under | |
621 -1/4096, add 1/4096 more to samples in next frame. In all other | |
622 cases, keep BIAS the same. */ | |
623 temp = 0.0f; | |
624 for (i = 0; i < LPC10_SAMPLES_PER_FRAME; i++) | |
625 { | |
626 s->inbuf[720 - 2*LPC10_SAMPLES_PER_FRAME + i] = speech[i]*4096.0f - s->bias; | |
627 temp += s->inbuf[720 - 2*LPC10_SAMPLES_PER_FRAME + i]; | |
628 } | |
629 if (temp > (float) LPC10_SAMPLES_PER_FRAME) | |
630 s->bias++; | |
631 else if (temp < (float) (-LPC10_SAMPLES_PER_FRAME)) | |
632 s->bias--; | |
633 /* Place voicing window */ | |
634 i = 721 - LPC10_SAMPLES_PER_FRAME; | |
635 s->zpre = preemp(&s->inbuf[i - 181], &s->pebuf[i - 181], LPC10_SAMPLES_PER_FRAME, precoef, s->zpre); | |
636 onset(s, s->pebuf, s->osbuf, &s->osptr, 10, 181, 720, LPC10_SAMPLES_PER_FRAME); | |
637 | |
638 lpc10_placev(s->osbuf, &s->osptr, 10, &s->obound[2], s->vwin, 3, LPC10_SAMPLES_PER_FRAME, 90, 156, 307, 462); | |
639 /* The Pitch Extraction algorithm estimates the pitch for a frame | |
640 of speech by locating the minimum of the average magnitude difference | |
641 function (AMDF). The AMDF operates on low-pass, inverse filtered | |
642 speech. (The low-pass filter is an 800 Hz, 19 tap, equiripple, FIR | |
643 filter and the inverse filter is a 2nd-order LPC filter.) The pitch | |
644 estimate is later refined by dynamic tracking. However, since some | |
645 of the tracking parameters are a function of the voicing decisions, | |
646 a voicing decision must precede the final pitch estimation. */ | |
647 /* See subroutines LPFILT, IVFILT, and eval_highres_amdf. */ | |
648 /* LPFILT reads indices LBUFH-LFRAME-29 = 511 through LBUFH = 720 | |
649 of INBUF, and writes indices LBUFH+1-LFRAME = 541 through LBUFH | |
650 = 720 of LPBUF. */ | |
651 lpfilt(&s->inbuf[228], &s->lpbuf[384], 312, LPC10_SAMPLES_PER_FRAME); | |
652 /* IVFILT reads indices (PWINH-LFRAME-7) = 353 through PWINH = 540 | |
653 of LPBUF, and writes indices (PWINH-LFRAME+1) = 361 through | |
654 PWINH = 540 of IVBUF. */ | |
655 ivfilt(&s->lpbuf[204], s->ivbuf, 312, LPC10_SAMPLES_PER_FRAME, ivrc); | |
656 /* eval_highres_amdf reads indices PWINL = 229 through | |
657 (PWINL-1)+MAXWIN+(TAU(LTAU)-TAU(1))/2 = 452 of IVBUF, and writes | |
658 indices 1 through LTAU = 60 of AMDF. */ | |
659 eval_highres_amdf(s->ivbuf, 156, tau, 60, amdf, &minptr, &maxptr, &mintau); | |
660 /* Voicing decisions are made for each half frame of input speech. | |
661 An initial voicing classification is made for each half of the | |
662 analysis frame, and the voicing decisions for the present frame | |
663 are finalized. See subroutine VOICIN. */ | |
664 /* The voicing detector (VOICIN) classifies the input signal as | |
665 unvoiced (including silence) or voiced using the AMDF windowed | |
666 maximum-to-minimum ratio, the zero crossing rate, energy measures, | |
667 reflection coefficients, and prediction gains. */ | |
668 /* The pitch and voicing rules apply smoothing and isolated | |
669 corrections to the pitch and voicing estimates and, in the process, | |
670 introduce two frames of delay into the corrected pitch estimates and | |
671 voicing decisions. */ | |
672 for (half = 0; half < 2; half++) | |
673 { | |
674 lpc10_voicing(s, | |
675 &s->vwin[2][0], | |
676 s->inbuf, | |
677 s->lpbuf, | |
678 buflim, | |
679 half, | |
680 &amdf[minptr], | |
681 &amdf[maxptr], | |
682 &mintau, | |
683 ivrc, | |
684 s->obound); | |
685 } | |
686 /* Find the minimum cost pitch decision over several frames, | |
687 given the current voicing decision and the AMDF array */ | |
688 minptr++; | |
689 dynamic_pitch_tracking(s, amdf, 60, &minptr, s->voibuf[3][1], pitch, &midx); | |
690 ipitch = tau[midx - 1]; | |
691 /* Place spectrum analysis and energy windows */ | |
692 lpc10_placea(&ipitch, s->voibuf, &s->obound[2], 3, s->vwin, s->awin, ewin, LPC10_SAMPLES_PER_FRAME, 156); | |
693 /* Remove short term DC bias over the analysis window. */ | |
694 lanal = s->awin[2][1] + 1 - s->awin[2][0]; | |
695 remove_dc_bias(&s->pebuf[s->awin[2][0] - 181], lanal, abuf); | |
696 /* Compute RMS over integer number of pitch periods within the analysis window. */ | |
697 /* Note that in a hardware implementation this computation may be | |
698 simplified by using diagonal elements of phi computed by mload(). */ | |
699 s->rmsbuf[2] = energyf(&abuf[ewin[2][0] - s->awin[2][0]], ewin[2][1] - ewin[2][0] + 1); | |
700 /* Matrix load and invert, check RC's for stability */ | |
701 mload(LPC10_ORDER, 1, lanal, abuf, phi, psi); | |
702 invert(LPC10_ORDER, phi, psi, &s->rcbuf[2][0]); | |
703 rcchk(LPC10_ORDER, &s->rcbuf[1][0], &s->rcbuf[2][0]); | |
704 /* Set return parameters */ | |
705 voice[0] = s->voibuf[1][0]; | |
706 voice[1] = s->voibuf[1][1]; | |
707 *rms = s->rmsbuf[0]; | |
708 for (i = 0; i < LPC10_ORDER; i++) | |
709 rc[i] = s->rcbuf[0][i]; | |
710 } | |
711 /*- End of function --------------------------------------------------------*/ | |
712 /*- End of file ------------------------------------------------------------*/ |