Mercurial > hg > audiostuff
comparison spandsp-0.0.6pre17/src/lpc10_analyse.c @ 4:26cd8f1ef0b1
import spandsp-0.0.6pre17
author | Peter Meerwald <pmeerw@cosy.sbg.ac.at> |
---|---|
date | Fri, 25 Jun 2010 15:50:58 +0200 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
3:c6c5a16ce2f2 | 4:26cd8f1ef0b1 |
---|---|
1 /* | |
2 * SpanDSP - a series of DSP components for telephony | |
3 * | |
4 * lpc10_analyse.c - LPC10 low bit rate speech codec. | |
5 * | |
6 * Written by Steve Underwood <steveu@coppice.org> | |
7 * | |
8 * Copyright (C) 2006 Steve Underwood | |
9 * | |
10 * All rights reserved. | |
11 * | |
12 * This program is free software; you can redistribute it and/or modify | |
13 * it under the terms of the GNU Lesser General Public License version 2.1, | |
14 * as published by the Free Software Foundation. | |
15 * | |
16 * This program is distributed in the hope that it will be useful, | |
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
19 * GNU Lesser General Public License for more details. | |
20 * | |
21 * You should have received a copy of the GNU Lesser General Public | |
22 * License along with this program; if not, write to the Free Software | |
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
24 * | |
25 * This code is based on the U.S. Department of Defense reference | |
26 * implementation of the LPC-10 2400 bps Voice Coder. They do not | |
27 * exert copyright claims on their code, and it may be freely used. | |
28 * | |
29 * $Id: lpc10_analyse.c,v 1.22 2009/01/28 03:41:27 steveu Exp $ | |
30 */ | |
31 | |
32 #if defined(HAVE_CONFIG_H) | |
33 #include "config.h" | |
34 #endif | |
35 | |
36 #include <stdlib.h> | |
37 #include <stdio.h> | |
38 #include <inttypes.h> | |
39 #include <memory.h> | |
40 #if defined(HAVE_TGMATH_H) | |
41 #include <tgmath.h> | |
42 #endif | |
43 #if defined(HAVE_MATH_H) | |
44 #include <math.h> | |
45 #endif | |
46 #include "floating_fudge.h" | |
47 | |
48 #include "spandsp/telephony.h" | |
49 #include "spandsp/dc_restore.h" | |
50 #include "spandsp/lpc10.h" | |
51 #include "spandsp/private/lpc10.h" | |
52 | |
53 #include "lpc10_encdecs.h" | |
54 | |
55 static __inline__ float energyf(float amp[], int len) | |
56 { | |
57 int i; | |
58 float rms; | |
59 | |
60 rms = 0.0f; | |
61 for (i = 0; i < len; i++) | |
62 rms += amp[i]*amp[i]; | |
63 rms = sqrtf(rms/len); | |
64 return rms; | |
65 } | |
66 /*- End of function --------------------------------------------------------*/ | |
67 | |
68 static void remove_dc_bias(float speech[], int len, float sigout[]) | |
69 { | |
70 float bias; | |
71 int i; | |
72 | |
73 bias = 0.0f; | |
74 for (i = 0; i < len; i++) | |
75 bias += speech[i]; | |
76 bias /= len; | |
77 for (i = 0; i < len; i++) | |
78 sigout[i] = speech[i] - bias; | |
79 } | |
80 /*- End of function --------------------------------------------------------*/ | |
81 | |
82 static void eval_amdf(float speech[], | |
83 int32_t lpita, | |
84 const int32_t tau[], | |
85 int32_t ltau, | |
86 int32_t maxlag, | |
87 float amdf[], | |
88 int32_t *minptr, | |
89 int32_t *maxptr) | |
90 { | |
91 float sum; | |
92 int i; | |
93 int j; | |
94 int n1; | |
95 int n2; | |
96 | |
97 *minptr = 0; | |
98 *maxptr = 0; | |
99 for (i = 0; i < ltau; i++) | |
100 { | |
101 n1 = (maxlag - tau[i])/2 + 1; | |
102 n2 = n1 + lpita - 1; | |
103 sum = 0.0f; | |
104 for (j = n1; j <= n2; j += 4) | |
105 sum += fabsf(speech[j - 1] - speech[j + tau[i] - 1]); | |
106 amdf[i] = sum; | |
107 if (amdf[i] < amdf[*minptr]) | |
108 *minptr = i; | |
109 if (amdf[i] > amdf[*maxptr]) | |
110 *maxptr = i; | |
111 } | |
112 } | |
113 /*- End of function --------------------------------------------------------*/ | |
114 | |
115 static void eval_highres_amdf(float speech[], | |
116 int32_t lpita, | |
117 const int32_t tau[], | |
118 int32_t ltau, | |
119 float amdf[], | |
120 int32_t *minptr, | |
121 int32_t *maxptr, | |
122 int32_t *mintau) | |
123 { | |
124 float amdf2[6]; | |
125 int32_t tau2[6]; | |
126 int32_t minp2; | |
127 int32_t ltau2; | |
128 int32_t maxp2; | |
129 int32_t minamd; | |
130 int i; | |
131 int i2; | |
132 int ptr; | |
133 | |
134 /* Compute full AMDF using log spaced lags, find coarse minimum */ | |
135 eval_amdf(speech, lpita, tau, ltau, tau[ltau - 1], amdf, minptr, maxptr); | |
136 *mintau = tau[*minptr]; | |
137 minamd = (int32_t) amdf[*minptr]; | |
138 | |
139 /* Build table containing all lags within +/- 3 of the AMDF minimum, | |
140 excluding all that have already been computed */ | |
141 ltau2 = 0; | |
142 ptr = *minptr - 2; | |
143 i2 = min(*mintau + 4, tau[ltau - 1]); | |
144 for (i = max(*mintau - 3, 41); i < i2; i++) | |
145 { | |
146 while (tau[ptr] < i) | |
147 ptr++; | |
148 if (tau[ptr] != i) | |
149 tau2[ltau2++] = i; | |
150 } | |
151 /* Compute AMDF of the new lags, if there are any, and choose one | |
152 if it is better than the coarse minimum */ | |
153 if (ltau2 > 0) | |
154 { | |
155 eval_amdf(speech, lpita, tau2, ltau2, tau[ltau - 1], amdf2, &minp2, &maxp2); | |
156 if (amdf2[minp2] < (float) minamd) | |
157 { | |
158 *mintau = tau2[minp2]; | |
159 minamd = (int32_t) amdf2[minp2]; | |
160 } | |
161 } | |
162 /* Check one octave up, if there are any lags not yet computed */ | |
163 if (*mintau >= 80) | |
164 { | |
165 i = *mintau/2; | |
166 if ((i & 1) == 0) | |
167 { | |
168 ltau2 = 2; | |
169 tau2[0] = i - 1; | |
170 tau2[1] = i + 1; | |
171 } | |
172 else | |
173 { | |
174 ltau2 = 1; | |
175 tau2[0] = i; | |
176 } | |
177 eval_amdf(speech, lpita, tau2, ltau2, tau[ltau - 1], amdf2, &minp2, &maxp2); | |
178 if (amdf2[minp2] < (float) minamd) | |
179 { | |
180 *mintau = tau2[minp2]; | |
181 minamd = (int32_t) amdf2[minp2]; | |
182 *minptr -= 20; | |
183 } | |
184 } | |
185 /* Force minimum of the AMDF array to the high resolution minimum */ | |
186 amdf[*minptr] = (float) minamd; | |
187 /* Find maximum of AMDF within 1/2 octave of minimum */ | |
188 *maxptr = max(*minptr - 5, 0); | |
189 i2 = min(*minptr + 6, ltau); | |
190 for (i = *maxptr; i < i2; i++) | |
191 { | |
192 if (amdf[i] > amdf[*maxptr]) | |
193 *maxptr = i; | |
194 } | |
195 } | |
196 /*- End of function --------------------------------------------------------*/ | |
197 | |
198 static void dynamic_pitch_tracking(lpc10_encode_state_t *s, | |
199 float amdf[], | |
200 int32_t ltau, | |
201 int32_t *minptr, | |
202 int32_t voice, | |
203 int32_t *pitch, | |
204 int32_t *midx) | |
205 { | |
206 int32_t pbar; | |
207 float sbar; | |
208 int32_t path[2]; | |
209 int32_t i; | |
210 int32_t j; | |
211 float alpha; | |
212 float minsc; | |
213 float maxsc; | |
214 | |
215 /* Calculate the confidence factor ALPHA, used as a threshold slope in */ | |
216 /* SEESAW. If unvoiced, set high slope so that every point in P array */ | |
217 /*is marked as a potential pitch frequency. A scaled up version (ALPHAX )*/ | |
218 /* is used to maintain arithmetic precision. */ | |
219 if (voice == 1) | |
220 s->alphax = s->alphax*0.75f + amdf[*minptr - 1]*0.5f; | |
221 else | |
222 s->alphax *= 0.984375f; | |
223 alpha = s->alphax/16; | |
224 if (voice == 0 && s->alphax < 128.0f) | |
225 alpha = 8.0f; | |
226 /* SEESAW: Construct a pitch pointer array and intermediate winner function */ | |
227 /* Left to right pass: */ | |
228 s->p[s->ipoint][0] = 1; | |
229 pbar = 1; | |
230 sbar = s->s[0]; | |
231 for (i = 0; i < ltau; i++) | |
232 { | |
233 sbar += alpha; | |
234 if (sbar < s->s[i]) | |
235 { | |
236 s->s[i] = sbar; | |
237 } | |
238 else | |
239 { | |
240 pbar = i + 1; | |
241 sbar = s->s[i]; | |
242 } | |
243 s->p[s->ipoint][i] = pbar; | |
244 } | |
245 /* Right to left pass: */ | |
246 sbar = s->s[pbar - 1]; | |
247 for (i = pbar - 2; i >= 0; i--) | |
248 { | |
249 sbar += alpha; | |
250 if (sbar < s->s[i]) | |
251 { | |
252 s->s[i] = sbar; | |
253 s->p[s->ipoint][i] = pbar; | |
254 } | |
255 else | |
256 { | |
257 pbar = s->p[s->ipoint][i]; | |
258 i = pbar - 1; | |
259 sbar = s->s[i]; | |
260 } | |
261 } | |
262 /* Update S using AMDF */ | |
263 /* Find maximum, minimum, and location of minimum */ | |
264 s->s[0] += amdf[0]/2; | |
265 minsc = s->s[0]; | |
266 maxsc = minsc; | |
267 *midx = 1; | |
268 for (i = 1; i < ltau; i++) | |
269 { | |
270 s->s[i] += amdf[i]/2; | |
271 if (s->s[i] > maxsc) | |
272 maxsc = s->s[i]; | |
273 if (s->s[i] < minsc) | |
274 { | |
275 *midx = i + 1; | |
276 minsc = s->s[i]; | |
277 } | |
278 } | |
279 /* Subtract MINSC from S to prevent overflow */ | |
280 for (i = 0; i < ltau; i++) | |
281 s->s[i] -= minsc; | |
282 maxsc -= minsc; | |
283 /* Use higher octave pitch if significant null there */ | |
284 j = 0; | |
285 for (i = 20; i <= 40; i += 10) | |
286 { | |
287 if (*midx > i) | |
288 { | |
289 if (s->s[*midx - i - 1] < maxsc / 4) | |
290 j = i; | |
291 } | |
292 } | |
293 *midx -= j; | |
294 /* TRACE: look back two frames to find minimum cost pitch estimate */ | |
295 *pitch = *midx; | |
296 for (i = 0, j = s->ipoint; i < 2; i++, j++) | |
297 { | |
298 *pitch = s->p[j & 1][*pitch - 1]; | |
299 path[i] = *pitch; | |
300 } | |
301 | |
302 /* The following statement subtracts one from IPOINT, mod DEPTH. I */ | |
303 /* think the author chose to add DEPTH-1, instead of subtracting 1, */ | |
304 /* because then it will work even if MOD doesn't work as desired on */ | |
305 /* negative arguments. */ | |
306 s->ipoint = (s->ipoint + 1) & 1; | |
307 } | |
308 /*- End of function --------------------------------------------------------*/ | |
309 | |
310 /* Detection of onsets in (or slightly preceding) the futuremost frame of speech. */ | |
311 static void onset(lpc10_encode_state_t *s, | |
312 float *pebuf, | |
313 int32_t osbuf[], | |
314 int32_t *osptr, | |
315 int32_t oslen, | |
316 int32_t sbufl, | |
317 int32_t sbufh, | |
318 int32_t lframe) | |
319 { | |
320 int32_t i; | |
321 float r1; | |
322 float l2sum2; | |
323 | |
324 pebuf -= sbufl; | |
325 | |
326 if (s->hyst) | |
327 s->lasti -= lframe; | |
328 for (i = sbufh - lframe + 1; i <= sbufh; i++) | |
329 { | |
330 /* Compute FPC; Use old FPC on divide by zero; Clamp FPC to +/- 1. */ | |
331 s->n = (pebuf[i]*pebuf[i - 1] + s->n*63.0f)/64.0f; | |
332 /* Computing 2nd power */ | |
333 r1 = pebuf[i - 1]; | |
334 s->d__ = (r1*r1 + s->d__*63.0f)/64.0f; | |
335 if (s->d__ != 0.0f) | |
336 { | |
337 if (fabsf(s->n) > s->d__) | |
338 s->fpc = r_sign(1.0f, s->n); | |
339 else | |
340 s->fpc = s->n/s->d__; | |
341 } | |
342 /* Filter FPC */ | |
343 l2sum2 = s->l2buf[s->l2ptr1 - 1]; | |
344 s->l2sum1 = s->l2sum1 - s->l2buf[s->l2ptr2 - 1] + s->fpc; | |
345 s->l2buf[s->l2ptr2 - 1] = s->l2sum1; | |
346 s->l2buf[s->l2ptr1 - 1] = s->fpc; | |
347 s->l2ptr1 = (s->l2ptr1 & 0xF) + 1; | |
348 s->l2ptr2 = (s->l2ptr2 & 0xF) + 1; | |
349 if (fabsf(s->l2sum1 - l2sum2) > 1.7f) | |
350 { | |
351 if (!s->hyst) | |
352 { | |
353 /* Ignore if buffer full */ | |
354 if (*osptr <= oslen) | |
355 { | |
356 osbuf[*osptr - 1] = i - 9; | |
357 (*osptr)++; | |
358 } | |
359 s->hyst = TRUE; | |
360 } | |
361 s->lasti = i; | |
362 /* After one onset detection, at least OSHYST sample times must go */ | |
363 /* by before another is allowed to occur. */ | |
364 } | |
365 else if (s->hyst && i - s->lasti >= 10) | |
366 { | |
367 s->hyst = FALSE; | |
368 } | |
369 } | |
370 } | |
371 /*- End of function --------------------------------------------------------*/ | |
372 | |
373 /* Load a covariance matrix. */ | |
374 static void mload(int32_t order, int32_t awins, int32_t awinf, float speech[], float phi[], float psi[]) | |
375 { | |
376 int32_t start; | |
377 int i; | |
378 int r; | |
379 | |
380 start = awins + order; | |
381 for (r = 1; r <= order; r++) | |
382 { | |
383 phi[r - 1] = 0.0f; | |
384 for (i = start; i <= awinf; i++) | |
385 phi[r - 1] += speech[i - 2]*speech[i - r - 1]; | |
386 } | |
387 | |
388 /* Load last element of vector PSI */ | |
389 psi[order - 1] = 0.0f; | |
390 for (i = start - 1; i < awinf; i++) | |
391 psi[order - 1] += speech[i]*speech[i - order]; | |
392 /* End correct to get additional columns of phi */ | |
393 for (r = 1; r < order; r++) | |
394 { | |
395 for (i = 1; i <= r; i++) | |
396 { | |
397 phi[i*order + r] = phi[(i - 1)*order + r - 1] | |
398 - speech[awinf - (r + 1)]*speech[awinf - (i + 1)] | |
399 + speech[start - (r + 2)]*speech[start - (i + 2)]; | |
400 } | |
401 } | |
402 /* End correct to get additional elements of PSI */ | |
403 for (i = 0; i < order - 1; i++) | |
404 { | |
405 psi[i] = phi[i + 1] | |
406 - speech[start - 2]*speech[start - i - 3] | |
407 + speech[awinf - 1]*speech[awinf - i - 2]; | |
408 } | |
409 } | |
410 /*- End of function --------------------------------------------------------*/ | |
411 | |
412 /* Preemphasize speech with a single-zero filter. */ | |
413 /* (When coef = .9375, preemphasis is as in LPC43.) */ | |
414 static float preemp(float inbuf[], float pebuf[], int nsamp, float coeff, float z) | |
415 { | |
416 float temp; | |
417 int i; | |
418 | |
419 for (i = 0; i < nsamp; i++) | |
420 { | |
421 temp = inbuf[i] - coeff*z; | |
422 z = inbuf[i]; | |
423 pebuf[i] = temp; | |
424 } | |
425 return z; | |
426 } | |
427 /*- End of function --------------------------------------------------------*/ | |
428 | |
429 /* Invert a covariance matrix using Choleski decomposition method. */ | |
430 static void invert(int32_t order, float phi[], float psi[], float rc[]) | |
431 { | |
432 float r1; | |
433 int32_t i; | |
434 int32_t j; | |
435 int32_t k; | |
436 float v[10][10]; | |
437 | |
438 for (j = 0; j < order; j++) | |
439 { | |
440 for (i = j; i < order; i++) | |
441 v[j][i] = phi[i + j*order]; | |
442 for (k = 0; k < j; k++) | |
443 { | |
444 r1 = v[k][j]*v[k][k]; | |
445 for (i = j; i <= order; i++) | |
446 v[j][i] -= v[k][i]*r1; | |
447 } | |
448 /* Compute intermediate results, which are similar to RC's */ | |
449 if (fabsf(v[j][j]) < 1.0e-10f) | |
450 { | |
451 for (i = j; i < order; i++) | |
452 rc[i] = 0.0f; | |
453 return; | |
454 } | |
455 rc[j] = psi[j]; | |
456 for (k = 0; k < j; k++) | |
457 rc[j] -= rc[k]*v[k][j]; | |
458 v[j][j] = 1.0f/v[j][j]; | |
459 rc[j] *= v[j][j]; | |
460 r1 = min(rc[j], 0.999f); | |
461 rc[j] = max(r1, -0.999f); | |
462 } | |
463 } | |
464 /*- End of function --------------------------------------------------------*/ | |
465 | |
466 /* Check RC's, repeat previous frame's RC's if unstable */ | |
467 static int rcchk(int order, float rc1f[], float rc2f[]) | |
468 { | |
469 int i; | |
470 | |
471 for (i = 0; i < order; i++) | |
472 { | |
473 if (fabsf(rc2f[i]) > 0.99f) | |
474 { | |
475 for (i = 0; i < order; i++) | |
476 rc2f[i] = rc1f[i]; | |
477 break; | |
478 } | |
479 } | |
480 return 0; | |
481 } | |
482 /*- End of function --------------------------------------------------------*/ | |
483 | |
484 static void lpfilt(float inbuf[], float lpbuf[], int32_t len, int32_t nsamp) | |
485 { | |
486 int32_t j; | |
487 float t; | |
488 | |
489 /* 31 point equiripple FIR LPF */ | |
490 /* Linear phase, delay = 15 samples */ | |
491 /* Passband: ripple = 0.25 dB, cutoff = 800 Hz */ | |
492 /* Stopband: atten. = 40. dB, cutoff = 1240 Hz */ | |
493 | |
494 for (j = len - nsamp; j < len; j++) | |
495 { | |
496 t = (inbuf[j] + inbuf[j - 30]) * -0.0097201988f; | |
497 t += (inbuf[j - 1] + inbuf[j - 29]) * -0.0105179986f; | |
498 t += (inbuf[j - 2] + inbuf[j - 28]) * -0.0083479648f; | |
499 t += (inbuf[j - 3] + inbuf[j - 27]) * 5.860774e-4f; | |
500 t += (inbuf[j - 4] + inbuf[j - 26]) * 0.0130892089f; | |
501 t += (inbuf[j - 5] + inbuf[j - 25]) * 0.0217052232f; | |
502 t += (inbuf[j - 6] + inbuf[j - 24]) * 0.0184161253f; | |
503 t += (inbuf[j - 7] + inbuf[j - 23]) * 3.39723e-4f; | |
504 t += (inbuf[j - 8] + inbuf[j - 22]) * -0.0260797087f; | |
505 t += (inbuf[j - 9] + inbuf[j - 21]) * -0.0455563702f; | |
506 t += (inbuf[j - 10] + inbuf[j - 20]) * -0.040306855f; | |
507 t += (inbuf[j - 11] + inbuf[j - 19]) * 5.029835e-4f; | |
508 t += (inbuf[j - 12] + inbuf[j - 18]) * 0.0729262903f; | |
509 t += (inbuf[j - 13] + inbuf[j - 17]) * 0.1572008878f; | |
510 t += (inbuf[j - 14] + inbuf[j - 16]) * 0.2247288674f; | |
511 t += inbuf[j - 15] * 0.250535965f; | |
512 lpbuf[j] = t; | |
513 } | |
514 } | |
515 /*- End of function --------------------------------------------------------*/ | |
516 | |
517 /* 2nd order inverse filter, speech is decimated 4:1 */ | |
518 static void ivfilt(float lpbuf[], float ivbuf[], int32_t len, int32_t nsamp, float ivrc[]) | |
519 { | |
520 int32_t i; | |
521 int32_t j; | |
522 int32_t k; | |
523 float r[3]; | |
524 float pc1; | |
525 float pc2; | |
526 | |
527 /* Calculate autocorrelations */ | |
528 for (i = 1; i <= 3; i++) | |
529 { | |
530 r[i - 1] = 0.0f; | |
531 k = (i - 1) << 2; | |
532 for (j = (i << 2) + len - nsamp; j <= len; j += 2) | |
533 r[i - 1] += lpbuf[j - 1]*lpbuf[j - k - 1]; | |
534 } | |
535 /* Calculate predictor coefficients */ | |
536 pc1 = 0.0f; | |
537 pc2 = 0.0f; | |
538 ivrc[0] = 0.0f; | |
539 ivrc[1] = 0.0f; | |
540 if (r[0] > 1.0e-10f) | |
541 { | |
542 ivrc[0] = r[1]/r[0]; | |
543 ivrc[1] = (r[2] - ivrc[0]*r[1])/(r[0] - ivrc[0]*r[1]); | |
544 pc1 = ivrc[0] - ivrc[0]*ivrc[1]; | |
545 pc2 = ivrc[1]; | |
546 } | |
547 /* Inverse filter LPBUF into IVBUF */ | |
548 for (i = len - nsamp; i < len; i++) | |
549 ivbuf[i] = lpbuf[i] - pc1*lpbuf[i - 4] - pc2*lpbuf[i - 8]; | |
550 } | |
551 /*- End of function --------------------------------------------------------*/ | |
552 | |
553 void lpc10_analyse(lpc10_encode_state_t *s, float speech[], int32_t voice[], int32_t *pitch, float *rms, float rc[]) | |
554 { | |
555 static const int32_t tau[60] = | |
556 { | |
557 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, | |
558 35, 36, 37, 38, 39, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, | |
559 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 84, 88, 92, 96, | |
560 100, 104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144, | |
561 148, 152, 156 | |
562 }; | |
563 static const int32_t buflim[4] = | |
564 { | |
565 181, 720, 25, 720 | |
566 }; | |
567 static const float precoef = 0.9375f; | |
568 | |
569 float amdf[60]; | |
570 float abuf[156]; | |
571 float ivrc[2]; | |
572 float temp; | |
573 float phi[100] /* was [10][10] */; | |
574 float psi[10]; | |
575 int32_t half; | |
576 int32_t midx; | |
577 int32_t ewin[3][2]; | |
578 int32_t i; | |
579 int32_t j; | |
580 int32_t lanal; | |
581 int32_t ipitch; | |
582 int32_t mintau; | |
583 int32_t minptr; | |
584 int32_t maxptr; | |
585 | |
586 /* Calculations are done on future frame due to requirements | |
587 of the pitch tracker. Delay RMS and RC's 2 frames to give | |
588 current frame parameters on return. */ | |
589 | |
590 for (i = 0; i <= 720 - LPC10_SAMPLES_PER_FRAME - 181; i++) | |
591 { | |
592 s->inbuf[i] = s->inbuf[LPC10_SAMPLES_PER_FRAME + i]; | |
593 s->pebuf[i] = s->pebuf[LPC10_SAMPLES_PER_FRAME + i]; | |
594 } | |
595 for (i = 0; i <= 540 - LPC10_SAMPLES_PER_FRAME - 229; i++) | |
596 s->ivbuf[i] = s->ivbuf[LPC10_SAMPLES_PER_FRAME + i]; | |
597 for (i = 0; i <= 720 - LPC10_SAMPLES_PER_FRAME - 25; i++) | |
598 s->lpbuf[i] = s->lpbuf[LPC10_SAMPLES_PER_FRAME + i]; | |
599 for (i = 0, j = 0; i < s->osptr - 1; i++) | |
600 { | |
601 if (s->osbuf[i] > LPC10_SAMPLES_PER_FRAME) | |
602 s->osbuf[j++] = s->osbuf[i] - LPC10_SAMPLES_PER_FRAME; | |
603 } | |
604 s->osptr = j + 1; | |
605 s->voibuf[0][0] = s->voibuf[1][0]; | |
606 s->voibuf[0][1] = s->voibuf[1][1]; | |
607 for (i = 0; i < 2; i++) | |
608 { | |
609 s->vwin[i][0] = s->vwin[i + 1][0] - LPC10_SAMPLES_PER_FRAME; | |
610 s->vwin[i][1] = s->vwin[i + 1][1] - LPC10_SAMPLES_PER_FRAME; | |
611 s->awin[i][0] = s->awin[i + 1][0] - LPC10_SAMPLES_PER_FRAME; | |
612 s->awin[i][1] = s->awin[i + 1][1] - LPC10_SAMPLES_PER_FRAME; | |
613 s->obound[i] = s->obound[i + 1]; | |
614 s->voibuf[i + 1][0] = s->voibuf[i + 2][0]; | |
615 s->voibuf[i + 1][1] = s->voibuf[i + 2][1]; | |
616 s->rmsbuf[i] = s->rmsbuf[i + 1]; | |
617 for (j = 0; j < LPC10_ORDER; j++) | |
618 s->rcbuf[i][j] = s->rcbuf[i + 1][j]; | |
619 } | |
620 /* If the average value in the frame was over 1/4096 (after current | |
621 BIAS correction), then subtract that much more from samples in the | |
622 next frame. If the average value in the frame was under | |
623 -1/4096, add 1/4096 more to samples in next frame. In all other | |
624 cases, keep BIAS the same. */ | |
625 temp = 0.0f; | |
626 for (i = 0; i < LPC10_SAMPLES_PER_FRAME; i++) | |
627 { | |
628 s->inbuf[720 - 2*LPC10_SAMPLES_PER_FRAME + i] = speech[i]*4096.0f - s->bias; | |
629 temp += s->inbuf[720 - 2*LPC10_SAMPLES_PER_FRAME + i]; | |
630 } | |
631 if (temp > (float) LPC10_SAMPLES_PER_FRAME) | |
632 s->bias++; | |
633 else if (temp < (float) (-LPC10_SAMPLES_PER_FRAME)) | |
634 s->bias--; | |
635 /* Place voicing window */ | |
636 i = 721 - LPC10_SAMPLES_PER_FRAME; | |
637 s->zpre = preemp(&s->inbuf[i - 181], &s->pebuf[i - 181], LPC10_SAMPLES_PER_FRAME, precoef, s->zpre); | |
638 onset(s, s->pebuf, s->osbuf, &s->osptr, 10, 181, 720, LPC10_SAMPLES_PER_FRAME); | |
639 | |
640 lpc10_placev(s->osbuf, &s->osptr, 10, &s->obound[2], s->vwin, 3, LPC10_SAMPLES_PER_FRAME, 90, 156, 307, 462); | |
641 /* The Pitch Extraction algorithm estimates the pitch for a frame | |
642 of speech by locating the minimum of the average magnitude difference | |
643 function (AMDF). The AMDF operates on low-pass, inverse filtered | |
644 speech. (The low-pass filter is an 800 Hz, 19 tap, equiripple, FIR | |
645 filter and the inverse filter is a 2nd-order LPC filter.) The pitch | |
646 estimate is later refined by dynamic tracking. However, since some | |
647 of the tracking parameters are a function of the voicing decisions, | |
648 a voicing decision must precede the final pitch estimation. */ | |
649 /* See subroutines LPFILT, IVFILT, and eval_highres_amdf. */ | |
650 /* LPFILT reads indices LBUFH-LFRAME-29 = 511 through LBUFH = 720 | |
651 of INBUF, and writes indices LBUFH+1-LFRAME = 541 through LBUFH | |
652 = 720 of LPBUF. */ | |
653 lpfilt(&s->inbuf[228], &s->lpbuf[384], 312, LPC10_SAMPLES_PER_FRAME); | |
654 /* IVFILT reads indices (PWINH-LFRAME-7) = 353 through PWINH = 540 | |
655 of LPBUF, and writes indices (PWINH-LFRAME+1) = 361 through | |
656 PWINH = 540 of IVBUF. */ | |
657 ivfilt(&s->lpbuf[204], s->ivbuf, 312, LPC10_SAMPLES_PER_FRAME, ivrc); | |
658 /* eval_highres_amdf reads indices PWINL = 229 through | |
659 (PWINL-1)+MAXWIN+(TAU(LTAU)-TAU(1))/2 = 452 of IVBUF, and writes | |
660 indices 1 through LTAU = 60 of AMDF. */ | |
661 eval_highres_amdf(s->ivbuf, 156, tau, 60, amdf, &minptr, &maxptr, &mintau); | |
662 /* Voicing decisions are made for each half frame of input speech. | |
663 An initial voicing classification is made for each half of the | |
664 analysis frame, and the voicing decisions for the present frame | |
665 are finalized. See subroutine VOICIN. */ | |
666 /* The voicing detector (VOICIN) classifies the input signal as | |
667 unvoiced (including silence) or voiced using the AMDF windowed | |
668 maximum-to-minimum ratio, the zero crossing rate, energy measures, | |
669 reflection coefficients, and prediction gains. */ | |
670 /* The pitch and voicing rules apply smoothing and isolated | |
671 corrections to the pitch and voicing estimates and, in the process, | |
672 introduce two frames of delay into the corrected pitch estimates and | |
673 voicing decisions. */ | |
674 for (half = 0; half < 2; half++) | |
675 { | |
676 lpc10_voicing(s, | |
677 &s->vwin[2][0], | |
678 s->inbuf, | |
679 s->lpbuf, | |
680 buflim, | |
681 half, | |
682 &amdf[minptr], | |
683 &amdf[maxptr], | |
684 &mintau, | |
685 ivrc, | |
686 s->obound); | |
687 } | |
688 /* Find the minimum cost pitch decision over several frames, | |
689 given the current voicing decision and the AMDF array */ | |
690 minptr++; | |
691 dynamic_pitch_tracking(s, amdf, 60, &minptr, s->voibuf[3][1], pitch, &midx); | |
692 ipitch = tau[midx - 1]; | |
693 /* Place spectrum analysis and energy windows */ | |
694 lpc10_placea(&ipitch, s->voibuf, &s->obound[2], 3, s->vwin, s->awin, ewin, LPC10_SAMPLES_PER_FRAME, 156); | |
695 /* Remove short term DC bias over the analysis window. */ | |
696 lanal = s->awin[2][1] + 1 - s->awin[2][0]; | |
697 remove_dc_bias(&s->pebuf[s->awin[2][0] - 181], lanal, abuf); | |
698 /* Compute RMS over integer number of pitch periods within the analysis window. */ | |
699 /* Note that in a hardware implementation this computation may be | |
700 simplified by using diagonal elements of phi computed by mload(). */ | |
701 s->rmsbuf[2] = energyf(&abuf[ewin[2][0] - s->awin[2][0]], ewin[2][1] - ewin[2][0] + 1); | |
702 /* Matrix load and invert, check RC's for stability */ | |
703 mload(LPC10_ORDER, 1, lanal, abuf, phi, psi); | |
704 invert(LPC10_ORDER, phi, psi, &s->rcbuf[2][0]); | |
705 rcchk(LPC10_ORDER, &s->rcbuf[1][0], &s->rcbuf[2][0]); | |
706 /* Set return parameters */ | |
707 voice[0] = s->voibuf[1][0]; | |
708 voice[1] = s->voibuf[1][1]; | |
709 *rms = s->rmsbuf[0]; | |
710 for (i = 0; i < LPC10_ORDER; i++) | |
711 rc[i] = s->rcbuf[0][i]; | |
712 } | |
713 /*- End of function --------------------------------------------------------*/ | |
714 /*- End of file ------------------------------------------------------------*/ |