Mercurial > hg > audiostuff
comparison spandsp-0.0.6pre17/src/lpc10_analyse.c @ 4:26cd8f1ef0b1
import spandsp-0.0.6pre17
| author | Peter Meerwald <pmeerw@cosy.sbg.ac.at> |
|---|---|
| date | Fri, 25 Jun 2010 15:50:58 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 3:c6c5a16ce2f2 | 4:26cd8f1ef0b1 |
|---|---|
| 1 /* | |
| 2 * SpanDSP - a series of DSP components for telephony | |
| 3 * | |
| 4 * lpc10_analyse.c - LPC10 low bit rate speech codec. | |
| 5 * | |
| 6 * Written by Steve Underwood <steveu@coppice.org> | |
| 7 * | |
| 8 * Copyright (C) 2006 Steve Underwood | |
| 9 * | |
| 10 * All rights reserved. | |
| 11 * | |
| 12 * This program is free software; you can redistribute it and/or modify | |
| 13 * it under the terms of the GNU Lesser General Public License version 2.1, | |
| 14 * as published by the Free Software Foundation. | |
| 15 * | |
| 16 * This program is distributed in the hope that it will be useful, | |
| 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 19 * GNU Lesser General Public License for more details. | |
| 20 * | |
| 21 * You should have received a copy of the GNU Lesser General Public | |
| 22 * License along with this program; if not, write to the Free Software | |
| 23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
| 24 * | |
| 25 * This code is based on the U.S. Department of Defense reference | |
| 26 * implementation of the LPC-10 2400 bps Voice Coder. They do not | |
| 27 * exert copyright claims on their code, and it may be freely used. | |
| 28 * | |
| 29 * $Id: lpc10_analyse.c,v 1.22 2009/01/28 03:41:27 steveu Exp $ | |
| 30 */ | |
| 31 | |
| 32 #if defined(HAVE_CONFIG_H) | |
| 33 #include "config.h" | |
| 34 #endif | |
| 35 | |
| 36 #include <stdlib.h> | |
| 37 #include <stdio.h> | |
| 38 #include <inttypes.h> | |
| 39 #include <memory.h> | |
| 40 #if defined(HAVE_TGMATH_H) | |
| 41 #include <tgmath.h> | |
| 42 #endif | |
| 43 #if defined(HAVE_MATH_H) | |
| 44 #include <math.h> | |
| 45 #endif | |
| 46 #include "floating_fudge.h" | |
| 47 | |
| 48 #include "spandsp/telephony.h" | |
| 49 #include "spandsp/dc_restore.h" | |
| 50 #include "spandsp/lpc10.h" | |
| 51 #include "spandsp/private/lpc10.h" | |
| 52 | |
| 53 #include "lpc10_encdecs.h" | |
| 54 | |
| 55 static __inline__ float energyf(float amp[], int len) | |
| 56 { | |
| 57 int i; | |
| 58 float rms; | |
| 59 | |
| 60 rms = 0.0f; | |
| 61 for (i = 0; i < len; i++) | |
| 62 rms += amp[i]*amp[i]; | |
| 63 rms = sqrtf(rms/len); | |
| 64 return rms; | |
| 65 } | |
| 66 /*- End of function --------------------------------------------------------*/ | |
| 67 | |
| 68 static void remove_dc_bias(float speech[], int len, float sigout[]) | |
| 69 { | |
| 70 float bias; | |
| 71 int i; | |
| 72 | |
| 73 bias = 0.0f; | |
| 74 for (i = 0; i < len; i++) | |
| 75 bias += speech[i]; | |
| 76 bias /= len; | |
| 77 for (i = 0; i < len; i++) | |
| 78 sigout[i] = speech[i] - bias; | |
| 79 } | |
| 80 /*- End of function --------------------------------------------------------*/ | |
| 81 | |
| 82 static void eval_amdf(float speech[], | |
| 83 int32_t lpita, | |
| 84 const int32_t tau[], | |
| 85 int32_t ltau, | |
| 86 int32_t maxlag, | |
| 87 float amdf[], | |
| 88 int32_t *minptr, | |
| 89 int32_t *maxptr) | |
| 90 { | |
| 91 float sum; | |
| 92 int i; | |
| 93 int j; | |
| 94 int n1; | |
| 95 int n2; | |
| 96 | |
| 97 *minptr = 0; | |
| 98 *maxptr = 0; | |
| 99 for (i = 0; i < ltau; i++) | |
| 100 { | |
| 101 n1 = (maxlag - tau[i])/2 + 1; | |
| 102 n2 = n1 + lpita - 1; | |
| 103 sum = 0.0f; | |
| 104 for (j = n1; j <= n2; j += 4) | |
| 105 sum += fabsf(speech[j - 1] - speech[j + tau[i] - 1]); | |
| 106 amdf[i] = sum; | |
| 107 if (amdf[i] < amdf[*minptr]) | |
| 108 *minptr = i; | |
| 109 if (amdf[i] > amdf[*maxptr]) | |
| 110 *maxptr = i; | |
| 111 } | |
| 112 } | |
| 113 /*- End of function --------------------------------------------------------*/ | |
| 114 | |
| 115 static void eval_highres_amdf(float speech[], | |
| 116 int32_t lpita, | |
| 117 const int32_t tau[], | |
| 118 int32_t ltau, | |
| 119 float amdf[], | |
| 120 int32_t *minptr, | |
| 121 int32_t *maxptr, | |
| 122 int32_t *mintau) | |
| 123 { | |
| 124 float amdf2[6]; | |
| 125 int32_t tau2[6]; | |
| 126 int32_t minp2; | |
| 127 int32_t ltau2; | |
| 128 int32_t maxp2; | |
| 129 int32_t minamd; | |
| 130 int i; | |
| 131 int i2; | |
| 132 int ptr; | |
| 133 | |
| 134 /* Compute full AMDF using log spaced lags, find coarse minimum */ | |
| 135 eval_amdf(speech, lpita, tau, ltau, tau[ltau - 1], amdf, minptr, maxptr); | |
| 136 *mintau = tau[*minptr]; | |
| 137 minamd = (int32_t) amdf[*minptr]; | |
| 138 | |
| 139 /* Build table containing all lags within +/- 3 of the AMDF minimum, | |
| 140 excluding all that have already been computed */ | |
| 141 ltau2 = 0; | |
| 142 ptr = *minptr - 2; | |
| 143 i2 = min(*mintau + 4, tau[ltau - 1]); | |
| 144 for (i = max(*mintau - 3, 41); i < i2; i++) | |
| 145 { | |
| 146 while (tau[ptr] < i) | |
| 147 ptr++; | |
| 148 if (tau[ptr] != i) | |
| 149 tau2[ltau2++] = i; | |
| 150 } | |
| 151 /* Compute AMDF of the new lags, if there are any, and choose one | |
| 152 if it is better than the coarse minimum */ | |
| 153 if (ltau2 > 0) | |
| 154 { | |
| 155 eval_amdf(speech, lpita, tau2, ltau2, tau[ltau - 1], amdf2, &minp2, &maxp2); | |
| 156 if (amdf2[minp2] < (float) minamd) | |
| 157 { | |
| 158 *mintau = tau2[minp2]; | |
| 159 minamd = (int32_t) amdf2[minp2]; | |
| 160 } | |
| 161 } | |
| 162 /* Check one octave up, if there are any lags not yet computed */ | |
| 163 if (*mintau >= 80) | |
| 164 { | |
| 165 i = *mintau/2; | |
| 166 if ((i & 1) == 0) | |
| 167 { | |
| 168 ltau2 = 2; | |
| 169 tau2[0] = i - 1; | |
| 170 tau2[1] = i + 1; | |
| 171 } | |
| 172 else | |
| 173 { | |
| 174 ltau2 = 1; | |
| 175 tau2[0] = i; | |
| 176 } | |
| 177 eval_amdf(speech, lpita, tau2, ltau2, tau[ltau - 1], amdf2, &minp2, &maxp2); | |
| 178 if (amdf2[minp2] < (float) minamd) | |
| 179 { | |
| 180 *mintau = tau2[minp2]; | |
| 181 minamd = (int32_t) amdf2[minp2]; | |
| 182 *minptr -= 20; | |
| 183 } | |
| 184 } | |
| 185 /* Force minimum of the AMDF array to the high resolution minimum */ | |
| 186 amdf[*minptr] = (float) minamd; | |
| 187 /* Find maximum of AMDF within 1/2 octave of minimum */ | |
| 188 *maxptr = max(*minptr - 5, 0); | |
| 189 i2 = min(*minptr + 6, ltau); | |
| 190 for (i = *maxptr; i < i2; i++) | |
| 191 { | |
| 192 if (amdf[i] > amdf[*maxptr]) | |
| 193 *maxptr = i; | |
| 194 } | |
| 195 } | |
| 196 /*- End of function --------------------------------------------------------*/ | |
| 197 | |
| 198 static void dynamic_pitch_tracking(lpc10_encode_state_t *s, | |
| 199 float amdf[], | |
| 200 int32_t ltau, | |
| 201 int32_t *minptr, | |
| 202 int32_t voice, | |
| 203 int32_t *pitch, | |
| 204 int32_t *midx) | |
| 205 { | |
| 206 int32_t pbar; | |
| 207 float sbar; | |
| 208 int32_t path[2]; | |
| 209 int32_t i; | |
| 210 int32_t j; | |
| 211 float alpha; | |
| 212 float minsc; | |
| 213 float maxsc; | |
| 214 | |
| 215 /* Calculate the confidence factor ALPHA, used as a threshold slope in */ | |
| 216 /* SEESAW. If unvoiced, set high slope so that every point in P array */ | |
| 217 /*is marked as a potential pitch frequency. A scaled up version (ALPHAX )*/ | |
| 218 /* is used to maintain arithmetic precision. */ | |
| 219 if (voice == 1) | |
| 220 s->alphax = s->alphax*0.75f + amdf[*minptr - 1]*0.5f; | |
| 221 else | |
| 222 s->alphax *= 0.984375f; | |
| 223 alpha = s->alphax/16; | |
| 224 if (voice == 0 && s->alphax < 128.0f) | |
| 225 alpha = 8.0f; | |
| 226 /* SEESAW: Construct a pitch pointer array and intermediate winner function */ | |
| 227 /* Left to right pass: */ | |
| 228 s->p[s->ipoint][0] = 1; | |
| 229 pbar = 1; | |
| 230 sbar = s->s[0]; | |
| 231 for (i = 0; i < ltau; i++) | |
| 232 { | |
| 233 sbar += alpha; | |
| 234 if (sbar < s->s[i]) | |
| 235 { | |
| 236 s->s[i] = sbar; | |
| 237 } | |
| 238 else | |
| 239 { | |
| 240 pbar = i + 1; | |
| 241 sbar = s->s[i]; | |
| 242 } | |
| 243 s->p[s->ipoint][i] = pbar; | |
| 244 } | |
| 245 /* Right to left pass: */ | |
| 246 sbar = s->s[pbar - 1]; | |
| 247 for (i = pbar - 2; i >= 0; i--) | |
| 248 { | |
| 249 sbar += alpha; | |
| 250 if (sbar < s->s[i]) | |
| 251 { | |
| 252 s->s[i] = sbar; | |
| 253 s->p[s->ipoint][i] = pbar; | |
| 254 } | |
| 255 else | |
| 256 { | |
| 257 pbar = s->p[s->ipoint][i]; | |
| 258 i = pbar - 1; | |
| 259 sbar = s->s[i]; | |
| 260 } | |
| 261 } | |
| 262 /* Update S using AMDF */ | |
| 263 /* Find maximum, minimum, and location of minimum */ | |
| 264 s->s[0] += amdf[0]/2; | |
| 265 minsc = s->s[0]; | |
| 266 maxsc = minsc; | |
| 267 *midx = 1; | |
| 268 for (i = 1; i < ltau; i++) | |
| 269 { | |
| 270 s->s[i] += amdf[i]/2; | |
| 271 if (s->s[i] > maxsc) | |
| 272 maxsc = s->s[i]; | |
| 273 if (s->s[i] < minsc) | |
| 274 { | |
| 275 *midx = i + 1; | |
| 276 minsc = s->s[i]; | |
| 277 } | |
| 278 } | |
| 279 /* Subtract MINSC from S to prevent overflow */ | |
| 280 for (i = 0; i < ltau; i++) | |
| 281 s->s[i] -= minsc; | |
| 282 maxsc -= minsc; | |
| 283 /* Use higher octave pitch if significant null there */ | |
| 284 j = 0; | |
| 285 for (i = 20; i <= 40; i += 10) | |
| 286 { | |
| 287 if (*midx > i) | |
| 288 { | |
| 289 if (s->s[*midx - i - 1] < maxsc / 4) | |
| 290 j = i; | |
| 291 } | |
| 292 } | |
| 293 *midx -= j; | |
| 294 /* TRACE: look back two frames to find minimum cost pitch estimate */ | |
| 295 *pitch = *midx; | |
| 296 for (i = 0, j = s->ipoint; i < 2; i++, j++) | |
| 297 { | |
| 298 *pitch = s->p[j & 1][*pitch - 1]; | |
| 299 path[i] = *pitch; | |
| 300 } | |
| 301 | |
| 302 /* The following statement subtracts one from IPOINT, mod DEPTH. I */ | |
| 303 /* think the author chose to add DEPTH-1, instead of subtracting 1, */ | |
| 304 /* because then it will work even if MOD doesn't work as desired on */ | |
| 305 /* negative arguments. */ | |
| 306 s->ipoint = (s->ipoint + 1) & 1; | |
| 307 } | |
| 308 /*- End of function --------------------------------------------------------*/ | |
| 309 | |
| 310 /* Detection of onsets in (or slightly preceding) the futuremost frame of speech. */ | |
| 311 static void onset(lpc10_encode_state_t *s, | |
| 312 float *pebuf, | |
| 313 int32_t osbuf[], | |
| 314 int32_t *osptr, | |
| 315 int32_t oslen, | |
| 316 int32_t sbufl, | |
| 317 int32_t sbufh, | |
| 318 int32_t lframe) | |
| 319 { | |
| 320 int32_t i; | |
| 321 float r1; | |
| 322 float l2sum2; | |
| 323 | |
| 324 pebuf -= sbufl; | |
| 325 | |
| 326 if (s->hyst) | |
| 327 s->lasti -= lframe; | |
| 328 for (i = sbufh - lframe + 1; i <= sbufh; i++) | |
| 329 { | |
| 330 /* Compute FPC; Use old FPC on divide by zero; Clamp FPC to +/- 1. */ | |
| 331 s->n = (pebuf[i]*pebuf[i - 1] + s->n*63.0f)/64.0f; | |
| 332 /* Computing 2nd power */ | |
| 333 r1 = pebuf[i - 1]; | |
| 334 s->d__ = (r1*r1 + s->d__*63.0f)/64.0f; | |
| 335 if (s->d__ != 0.0f) | |
| 336 { | |
| 337 if (fabsf(s->n) > s->d__) | |
| 338 s->fpc = r_sign(1.0f, s->n); | |
| 339 else | |
| 340 s->fpc = s->n/s->d__; | |
| 341 } | |
| 342 /* Filter FPC */ | |
| 343 l2sum2 = s->l2buf[s->l2ptr1 - 1]; | |
| 344 s->l2sum1 = s->l2sum1 - s->l2buf[s->l2ptr2 - 1] + s->fpc; | |
| 345 s->l2buf[s->l2ptr2 - 1] = s->l2sum1; | |
| 346 s->l2buf[s->l2ptr1 - 1] = s->fpc; | |
| 347 s->l2ptr1 = (s->l2ptr1 & 0xF) + 1; | |
| 348 s->l2ptr2 = (s->l2ptr2 & 0xF) + 1; | |
| 349 if (fabsf(s->l2sum1 - l2sum2) > 1.7f) | |
| 350 { | |
| 351 if (!s->hyst) | |
| 352 { | |
| 353 /* Ignore if buffer full */ | |
| 354 if (*osptr <= oslen) | |
| 355 { | |
| 356 osbuf[*osptr - 1] = i - 9; | |
| 357 (*osptr)++; | |
| 358 } | |
| 359 s->hyst = TRUE; | |
| 360 } | |
| 361 s->lasti = i; | |
| 362 /* After one onset detection, at least OSHYST sample times must go */ | |
| 363 /* by before another is allowed to occur. */ | |
| 364 } | |
| 365 else if (s->hyst && i - s->lasti >= 10) | |
| 366 { | |
| 367 s->hyst = FALSE; | |
| 368 } | |
| 369 } | |
| 370 } | |
| 371 /*- End of function --------------------------------------------------------*/ | |
| 372 | |
| 373 /* Load a covariance matrix. */ | |
| 374 static void mload(int32_t order, int32_t awins, int32_t awinf, float speech[], float phi[], float psi[]) | |
| 375 { | |
| 376 int32_t start; | |
| 377 int i; | |
| 378 int r; | |
| 379 | |
| 380 start = awins + order; | |
| 381 for (r = 1; r <= order; r++) | |
| 382 { | |
| 383 phi[r - 1] = 0.0f; | |
| 384 for (i = start; i <= awinf; i++) | |
| 385 phi[r - 1] += speech[i - 2]*speech[i - r - 1]; | |
| 386 } | |
| 387 | |
| 388 /* Load last element of vector PSI */ | |
| 389 psi[order - 1] = 0.0f; | |
| 390 for (i = start - 1; i < awinf; i++) | |
| 391 psi[order - 1] += speech[i]*speech[i - order]; | |
| 392 /* End correct to get additional columns of phi */ | |
| 393 for (r = 1; r < order; r++) | |
| 394 { | |
| 395 for (i = 1; i <= r; i++) | |
| 396 { | |
| 397 phi[i*order + r] = phi[(i - 1)*order + r - 1] | |
| 398 - speech[awinf - (r + 1)]*speech[awinf - (i + 1)] | |
| 399 + speech[start - (r + 2)]*speech[start - (i + 2)]; | |
| 400 } | |
| 401 } | |
| 402 /* End correct to get additional elements of PSI */ | |
| 403 for (i = 0; i < order - 1; i++) | |
| 404 { | |
| 405 psi[i] = phi[i + 1] | |
| 406 - speech[start - 2]*speech[start - i - 3] | |
| 407 + speech[awinf - 1]*speech[awinf - i - 2]; | |
| 408 } | |
| 409 } | |
| 410 /*- End of function --------------------------------------------------------*/ | |
| 411 | |
| 412 /* Preemphasize speech with a single-zero filter. */ | |
| 413 /* (When coef = .9375, preemphasis is as in LPC43.) */ | |
| 414 static float preemp(float inbuf[], float pebuf[], int nsamp, float coeff, float z) | |
| 415 { | |
| 416 float temp; | |
| 417 int i; | |
| 418 | |
| 419 for (i = 0; i < nsamp; i++) | |
| 420 { | |
| 421 temp = inbuf[i] - coeff*z; | |
| 422 z = inbuf[i]; | |
| 423 pebuf[i] = temp; | |
| 424 } | |
| 425 return z; | |
| 426 } | |
| 427 /*- End of function --------------------------------------------------------*/ | |
| 428 | |
| 429 /* Invert a covariance matrix using Choleski decomposition method. */ | |
| 430 static void invert(int32_t order, float phi[], float psi[], float rc[]) | |
| 431 { | |
| 432 float r1; | |
| 433 int32_t i; | |
| 434 int32_t j; | |
| 435 int32_t k; | |
| 436 float v[10][10]; | |
| 437 | |
| 438 for (j = 0; j < order; j++) | |
| 439 { | |
| 440 for (i = j; i < order; i++) | |
| 441 v[j][i] = phi[i + j*order]; | |
| 442 for (k = 0; k < j; k++) | |
| 443 { | |
| 444 r1 = v[k][j]*v[k][k]; | |
| 445 for (i = j; i <= order; i++) | |
| 446 v[j][i] -= v[k][i]*r1; | |
| 447 } | |
| 448 /* Compute intermediate results, which are similar to RC's */ | |
| 449 if (fabsf(v[j][j]) < 1.0e-10f) | |
| 450 { | |
| 451 for (i = j; i < order; i++) | |
| 452 rc[i] = 0.0f; | |
| 453 return; | |
| 454 } | |
| 455 rc[j] = psi[j]; | |
| 456 for (k = 0; k < j; k++) | |
| 457 rc[j] -= rc[k]*v[k][j]; | |
| 458 v[j][j] = 1.0f/v[j][j]; | |
| 459 rc[j] *= v[j][j]; | |
| 460 r1 = min(rc[j], 0.999f); | |
| 461 rc[j] = max(r1, -0.999f); | |
| 462 } | |
| 463 } | |
| 464 /*- End of function --------------------------------------------------------*/ | |
| 465 | |
| 466 /* Check RC's, repeat previous frame's RC's if unstable */ | |
| 467 static int rcchk(int order, float rc1f[], float rc2f[]) | |
| 468 { | |
| 469 int i; | |
| 470 | |
| 471 for (i = 0; i < order; i++) | |
| 472 { | |
| 473 if (fabsf(rc2f[i]) > 0.99f) | |
| 474 { | |
| 475 for (i = 0; i < order; i++) | |
| 476 rc2f[i] = rc1f[i]; | |
| 477 break; | |
| 478 } | |
| 479 } | |
| 480 return 0; | |
| 481 } | |
| 482 /*- End of function --------------------------------------------------------*/ | |
| 483 | |
| 484 static void lpfilt(float inbuf[], float lpbuf[], int32_t len, int32_t nsamp) | |
| 485 { | |
| 486 int32_t j; | |
| 487 float t; | |
| 488 | |
| 489 /* 31 point equiripple FIR LPF */ | |
| 490 /* Linear phase, delay = 15 samples */ | |
| 491 /* Passband: ripple = 0.25 dB, cutoff = 800 Hz */ | |
| 492 /* Stopband: atten. = 40. dB, cutoff = 1240 Hz */ | |
| 493 | |
| 494 for (j = len - nsamp; j < len; j++) | |
| 495 { | |
| 496 t = (inbuf[j] + inbuf[j - 30]) * -0.0097201988f; | |
| 497 t += (inbuf[j - 1] + inbuf[j - 29]) * -0.0105179986f; | |
| 498 t += (inbuf[j - 2] + inbuf[j - 28]) * -0.0083479648f; | |
| 499 t += (inbuf[j - 3] + inbuf[j - 27]) * 5.860774e-4f; | |
| 500 t += (inbuf[j - 4] + inbuf[j - 26]) * 0.0130892089f; | |
| 501 t += (inbuf[j - 5] + inbuf[j - 25]) * 0.0217052232f; | |
| 502 t += (inbuf[j - 6] + inbuf[j - 24]) * 0.0184161253f; | |
| 503 t += (inbuf[j - 7] + inbuf[j - 23]) * 3.39723e-4f; | |
| 504 t += (inbuf[j - 8] + inbuf[j - 22]) * -0.0260797087f; | |
| 505 t += (inbuf[j - 9] + inbuf[j - 21]) * -0.0455563702f; | |
| 506 t += (inbuf[j - 10] + inbuf[j - 20]) * -0.040306855f; | |
| 507 t += (inbuf[j - 11] + inbuf[j - 19]) * 5.029835e-4f; | |
| 508 t += (inbuf[j - 12] + inbuf[j - 18]) * 0.0729262903f; | |
| 509 t += (inbuf[j - 13] + inbuf[j - 17]) * 0.1572008878f; | |
| 510 t += (inbuf[j - 14] + inbuf[j - 16]) * 0.2247288674f; | |
| 511 t += inbuf[j - 15] * 0.250535965f; | |
| 512 lpbuf[j] = t; | |
| 513 } | |
| 514 } | |
| 515 /*- End of function --------------------------------------------------------*/ | |
| 516 | |
| 517 /* 2nd order inverse filter, speech is decimated 4:1 */ | |
| 518 static void ivfilt(float lpbuf[], float ivbuf[], int32_t len, int32_t nsamp, float ivrc[]) | |
| 519 { | |
| 520 int32_t i; | |
| 521 int32_t j; | |
| 522 int32_t k; | |
| 523 float r[3]; | |
| 524 float pc1; | |
| 525 float pc2; | |
| 526 | |
| 527 /* Calculate autocorrelations */ | |
| 528 for (i = 1; i <= 3; i++) | |
| 529 { | |
| 530 r[i - 1] = 0.0f; | |
| 531 k = (i - 1) << 2; | |
| 532 for (j = (i << 2) + len - nsamp; j <= len; j += 2) | |
| 533 r[i - 1] += lpbuf[j - 1]*lpbuf[j - k - 1]; | |
| 534 } | |
| 535 /* Calculate predictor coefficients */ | |
| 536 pc1 = 0.0f; | |
| 537 pc2 = 0.0f; | |
| 538 ivrc[0] = 0.0f; | |
| 539 ivrc[1] = 0.0f; | |
| 540 if (r[0] > 1.0e-10f) | |
| 541 { | |
| 542 ivrc[0] = r[1]/r[0]; | |
| 543 ivrc[1] = (r[2] - ivrc[0]*r[1])/(r[0] - ivrc[0]*r[1]); | |
| 544 pc1 = ivrc[0] - ivrc[0]*ivrc[1]; | |
| 545 pc2 = ivrc[1]; | |
| 546 } | |
| 547 /* Inverse filter LPBUF into IVBUF */ | |
| 548 for (i = len - nsamp; i < len; i++) | |
| 549 ivbuf[i] = lpbuf[i] - pc1*lpbuf[i - 4] - pc2*lpbuf[i - 8]; | |
| 550 } | |
| 551 /*- End of function --------------------------------------------------------*/ | |
| 552 | |
| 553 void lpc10_analyse(lpc10_encode_state_t *s, float speech[], int32_t voice[], int32_t *pitch, float *rms, float rc[]) | |
| 554 { | |
| 555 static const int32_t tau[60] = | |
| 556 { | |
| 557 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, | |
| 558 35, 36, 37, 38, 39, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, | |
| 559 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 84, 88, 92, 96, | |
| 560 100, 104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144, | |
| 561 148, 152, 156 | |
| 562 }; | |
| 563 static const int32_t buflim[4] = | |
| 564 { | |
| 565 181, 720, 25, 720 | |
| 566 }; | |
| 567 static const float precoef = 0.9375f; | |
| 568 | |
| 569 float amdf[60]; | |
| 570 float abuf[156]; | |
| 571 float ivrc[2]; | |
| 572 float temp; | |
| 573 float phi[100] /* was [10][10] */; | |
| 574 float psi[10]; | |
| 575 int32_t half; | |
| 576 int32_t midx; | |
| 577 int32_t ewin[3][2]; | |
| 578 int32_t i; | |
| 579 int32_t j; | |
| 580 int32_t lanal; | |
| 581 int32_t ipitch; | |
| 582 int32_t mintau; | |
| 583 int32_t minptr; | |
| 584 int32_t maxptr; | |
| 585 | |
| 586 /* Calculations are done on future frame due to requirements | |
| 587 of the pitch tracker. Delay RMS and RC's 2 frames to give | |
| 588 current frame parameters on return. */ | |
| 589 | |
| 590 for (i = 0; i <= 720 - LPC10_SAMPLES_PER_FRAME - 181; i++) | |
| 591 { | |
| 592 s->inbuf[i] = s->inbuf[LPC10_SAMPLES_PER_FRAME + i]; | |
| 593 s->pebuf[i] = s->pebuf[LPC10_SAMPLES_PER_FRAME + i]; | |
| 594 } | |
| 595 for (i = 0; i <= 540 - LPC10_SAMPLES_PER_FRAME - 229; i++) | |
| 596 s->ivbuf[i] = s->ivbuf[LPC10_SAMPLES_PER_FRAME + i]; | |
| 597 for (i = 0; i <= 720 - LPC10_SAMPLES_PER_FRAME - 25; i++) | |
| 598 s->lpbuf[i] = s->lpbuf[LPC10_SAMPLES_PER_FRAME + i]; | |
| 599 for (i = 0, j = 0; i < s->osptr - 1; i++) | |
| 600 { | |
| 601 if (s->osbuf[i] > LPC10_SAMPLES_PER_FRAME) | |
| 602 s->osbuf[j++] = s->osbuf[i] - LPC10_SAMPLES_PER_FRAME; | |
| 603 } | |
| 604 s->osptr = j + 1; | |
| 605 s->voibuf[0][0] = s->voibuf[1][0]; | |
| 606 s->voibuf[0][1] = s->voibuf[1][1]; | |
| 607 for (i = 0; i < 2; i++) | |
| 608 { | |
| 609 s->vwin[i][0] = s->vwin[i + 1][0] - LPC10_SAMPLES_PER_FRAME; | |
| 610 s->vwin[i][1] = s->vwin[i + 1][1] - LPC10_SAMPLES_PER_FRAME; | |
| 611 s->awin[i][0] = s->awin[i + 1][0] - LPC10_SAMPLES_PER_FRAME; | |
| 612 s->awin[i][1] = s->awin[i + 1][1] - LPC10_SAMPLES_PER_FRAME; | |
| 613 s->obound[i] = s->obound[i + 1]; | |
| 614 s->voibuf[i + 1][0] = s->voibuf[i + 2][0]; | |
| 615 s->voibuf[i + 1][1] = s->voibuf[i + 2][1]; | |
| 616 s->rmsbuf[i] = s->rmsbuf[i + 1]; | |
| 617 for (j = 0; j < LPC10_ORDER; j++) | |
| 618 s->rcbuf[i][j] = s->rcbuf[i + 1][j]; | |
| 619 } | |
| 620 /* If the average value in the frame was over 1/4096 (after current | |
| 621 BIAS correction), then subtract that much more from samples in the | |
| 622 next frame. If the average value in the frame was under | |
| 623 -1/4096, add 1/4096 more to samples in next frame. In all other | |
| 624 cases, keep BIAS the same. */ | |
| 625 temp = 0.0f; | |
| 626 for (i = 0; i < LPC10_SAMPLES_PER_FRAME; i++) | |
| 627 { | |
| 628 s->inbuf[720 - 2*LPC10_SAMPLES_PER_FRAME + i] = speech[i]*4096.0f - s->bias; | |
| 629 temp += s->inbuf[720 - 2*LPC10_SAMPLES_PER_FRAME + i]; | |
| 630 } | |
| 631 if (temp > (float) LPC10_SAMPLES_PER_FRAME) | |
| 632 s->bias++; | |
| 633 else if (temp < (float) (-LPC10_SAMPLES_PER_FRAME)) | |
| 634 s->bias--; | |
| 635 /* Place voicing window */ | |
| 636 i = 721 - LPC10_SAMPLES_PER_FRAME; | |
| 637 s->zpre = preemp(&s->inbuf[i - 181], &s->pebuf[i - 181], LPC10_SAMPLES_PER_FRAME, precoef, s->zpre); | |
| 638 onset(s, s->pebuf, s->osbuf, &s->osptr, 10, 181, 720, LPC10_SAMPLES_PER_FRAME); | |
| 639 | |
| 640 lpc10_placev(s->osbuf, &s->osptr, 10, &s->obound[2], s->vwin, 3, LPC10_SAMPLES_PER_FRAME, 90, 156, 307, 462); | |
| 641 /* The Pitch Extraction algorithm estimates the pitch for a frame | |
| 642 of speech by locating the minimum of the average magnitude difference | |
| 643 function (AMDF). The AMDF operates on low-pass, inverse filtered | |
| 644 speech. (The low-pass filter is an 800 Hz, 19 tap, equiripple, FIR | |
| 645 filter and the inverse filter is a 2nd-order LPC filter.) The pitch | |
| 646 estimate is later refined by dynamic tracking. However, since some | |
| 647 of the tracking parameters are a function of the voicing decisions, | |
| 648 a voicing decision must precede the final pitch estimation. */ | |
| 649 /* See subroutines LPFILT, IVFILT, and eval_highres_amdf. */ | |
| 650 /* LPFILT reads indices LBUFH-LFRAME-29 = 511 through LBUFH = 720 | |
| 651 of INBUF, and writes indices LBUFH+1-LFRAME = 541 through LBUFH | |
| 652 = 720 of LPBUF. */ | |
| 653 lpfilt(&s->inbuf[228], &s->lpbuf[384], 312, LPC10_SAMPLES_PER_FRAME); | |
| 654 /* IVFILT reads indices (PWINH-LFRAME-7) = 353 through PWINH = 540 | |
| 655 of LPBUF, and writes indices (PWINH-LFRAME+1) = 361 through | |
| 656 PWINH = 540 of IVBUF. */ | |
| 657 ivfilt(&s->lpbuf[204], s->ivbuf, 312, LPC10_SAMPLES_PER_FRAME, ivrc); | |
| 658 /* eval_highres_amdf reads indices PWINL = 229 through | |
| 659 (PWINL-1)+MAXWIN+(TAU(LTAU)-TAU(1))/2 = 452 of IVBUF, and writes | |
| 660 indices 1 through LTAU = 60 of AMDF. */ | |
| 661 eval_highres_amdf(s->ivbuf, 156, tau, 60, amdf, &minptr, &maxptr, &mintau); | |
| 662 /* Voicing decisions are made for each half frame of input speech. | |
| 663 An initial voicing classification is made for each half of the | |
| 664 analysis frame, and the voicing decisions for the present frame | |
| 665 are finalized. See subroutine VOICIN. */ | |
| 666 /* The voicing detector (VOICIN) classifies the input signal as | |
| 667 unvoiced (including silence) or voiced using the AMDF windowed | |
| 668 maximum-to-minimum ratio, the zero crossing rate, energy measures, | |
| 669 reflection coefficients, and prediction gains. */ | |
| 670 /* The pitch and voicing rules apply smoothing and isolated | |
| 671 corrections to the pitch and voicing estimates and, in the process, | |
| 672 introduce two frames of delay into the corrected pitch estimates and | |
| 673 voicing decisions. */ | |
| 674 for (half = 0; half < 2; half++) | |
| 675 { | |
| 676 lpc10_voicing(s, | |
| 677 &s->vwin[2][0], | |
| 678 s->inbuf, | |
| 679 s->lpbuf, | |
| 680 buflim, | |
| 681 half, | |
| 682 &amdf[minptr], | |
| 683 &amdf[maxptr], | |
| 684 &mintau, | |
| 685 ivrc, | |
| 686 s->obound); | |
| 687 } | |
| 688 /* Find the minimum cost pitch decision over several frames, | |
| 689 given the current voicing decision and the AMDF array */ | |
| 690 minptr++; | |
| 691 dynamic_pitch_tracking(s, amdf, 60, &minptr, s->voibuf[3][1], pitch, &midx); | |
| 692 ipitch = tau[midx - 1]; | |
| 693 /* Place spectrum analysis and energy windows */ | |
| 694 lpc10_placea(&ipitch, s->voibuf, &s->obound[2], 3, s->vwin, s->awin, ewin, LPC10_SAMPLES_PER_FRAME, 156); | |
| 695 /* Remove short term DC bias over the analysis window. */ | |
| 696 lanal = s->awin[2][1] + 1 - s->awin[2][0]; | |
| 697 remove_dc_bias(&s->pebuf[s->awin[2][0] - 181], lanal, abuf); | |
| 698 /* Compute RMS over integer number of pitch periods within the analysis window. */ | |
| 699 /* Note that in a hardware implementation this computation may be | |
| 700 simplified by using diagonal elements of phi computed by mload(). */ | |
| 701 s->rmsbuf[2] = energyf(&abuf[ewin[2][0] - s->awin[2][0]], ewin[2][1] - ewin[2][0] + 1); | |
| 702 /* Matrix load and invert, check RC's for stability */ | |
| 703 mload(LPC10_ORDER, 1, lanal, abuf, phi, psi); | |
| 704 invert(LPC10_ORDER, phi, psi, &s->rcbuf[2][0]); | |
| 705 rcchk(LPC10_ORDER, &s->rcbuf[1][0], &s->rcbuf[2][0]); | |
| 706 /* Set return parameters */ | |
| 707 voice[0] = s->voibuf[1][0]; | |
| 708 voice[1] = s->voibuf[1][1]; | |
| 709 *rms = s->rmsbuf[0]; | |
| 710 for (i = 0; i < LPC10_ORDER; i++) | |
| 711 rc[i] = s->rcbuf[0][i]; | |
| 712 } | |
| 713 /*- End of function --------------------------------------------------------*/ | |
| 714 /*- End of file ------------------------------------------------------------*/ |
