5
|
1 /*
|
|
2 * SpanDSP - a series of DSP components for telephony
|
|
3 *
|
|
4 * echo.c - A line echo canceller. This code is being developed
|
|
5 * against and partially complies with G168.
|
|
6 *
|
|
7 * Written by Steve Underwood <steveu@coppice.org>
|
|
8 * and David Rowe <david_at_rowetel_dot_com>
|
|
9 *
|
|
10 * Copyright (C) 2001, 2003 Steve Underwood, 2007 David Rowe
|
|
11 *
|
|
12 * Based on a bit from here, a bit from there, eye of toad, ear of
|
|
13 * bat, 15 years of failed attempts by David and a few fried brain
|
|
14 * cells.
|
|
15 *
|
|
16 * All rights reserved.
|
|
17 *
|
|
18 * This program is free software; you can redistribute it and/or modify
|
|
19 * it under the terms of the GNU General Public License version 2, as
|
|
20 * published by the Free Software Foundation.
|
|
21 *
|
|
22 * This program is distributed in the hope that it will be useful,
|
|
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
25 * GNU General Public License for more details.
|
|
26 *
|
|
27 * You should have received a copy of the GNU General Public License
|
|
28 * along with this program; if not, write to the Free Software
|
|
29 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
|
30 *
|
|
31 * $Id: echo.c,v 1.20 2006/12/01 18:00:48 steveu Exp $
|
|
32 */
|
|
33
|
|
34 /*! \file */
|
|
35
|
|
36 /* Implementation Notes
|
|
37 David Rowe
|
|
38 April 2007
|
|
39
|
|
40 This code started life as Steve's NLMS algorithm with a tap
|
|
41 rotation algorithm to handle divergence during double talk. I
|
|
42 added a Geigel Double Talk Detector (DTD) [2] and performed some
|
|
43 G168 tests. However I had trouble meeting the G168 requirements,
|
|
44 especially for double talk - there were always cases where my DTD
|
|
45 failed, for example where near end speech was under the 6dB
|
|
46 threshold required for declaring double talk.
|
|
47
|
|
48 So I tried a two path algorithm [1], which has so far given better
|
|
49 results. The original tap rotation/Geigel algorithm is available
|
|
50 in SVN http://svn.rowetel.com/software/oslec/tags/before_16bit.
|
|
51 It's probably possible to make it work if some one wants to put some
|
|
52 serious work into it.
|
|
53
|
|
54 At present no special treatment is provided for tones, which
|
|
55 generally cause NLMS algorithms to diverge. Initial runs of a
|
|
56 subset of the G168 tests for tones (e.g ./echo_test 6) show the
|
|
57 current algorithm is passing OK, which is kind of surprising. The
|
|
58 full set of tests needs to be performed to confirm this result.
|
|
59
|
|
60 One other interesting change is that I have managed to get the NLMS
|
|
61 code to work with 16 bit coefficients, rather than the original 32
|
|
62 bit coefficents. This reduces the MIPs and storage required.
|
|
63 I evaulated the 16 bit port using g168_tests.sh and listening tests
|
|
64 on 4 real-world samples.
|
|
65
|
|
66 I also attempted the implementation of a block based NLMS update
|
|
67 [2] but although this passes g168_tests.sh it didn't converge well
|
|
68 on the real-world samples. I have no idea why, perhaps a scaling
|
|
69 problem. The block based code is also available in SVN
|
|
70 http://svn.rowetel.com/software/oslec/tags/before_16bit. If this
|
|
71 code can be debugged, it will lead to further reduction in MIPS, as
|
|
72 the block update code maps nicely onto DSP instruction sets (it's a
|
|
73 dot product) compared to the current sample-by-sample update.
|
|
74
|
|
75 Steve also has some nice notes on echo cancellers in echo.h
|
|
76
|
|
77
|
|
78 References:
|
|
79
|
|
80 [1] Ochiai, Areseki, and Ogihara, "Echo Canceller with Two Echo
|
|
81 Path Models", IEEE Transactions on communications, COM-25,
|
|
82 No. 6, June
|
|
83 1977.
|
|
84 http://www.rowetel.com/images/echo/dual_path_paper.pdf
|
|
85
|
|
86 [2] The classic, very useful paper that tells you how to
|
|
87 actually build a real world echo canceller:
|
|
88 Messerschmitt, Hedberg, Cole, Haoui, Winship, "Digital Voice
|
|
89 Echo Canceller with a TMS320020,
|
|
90 http://www.rowetel.com/images/echo/spra129.pdf
|
|
91
|
|
92 [3] I have written a series of blog posts on this work, here is
|
|
93 Part 1: http://www.rowetel.com/blog/?p=18
|
|
94
|
|
95 [4] The source code http://svn.rowetel.com/software/oslec/
|
|
96
|
|
97 [5] A nice reference on LMS filters:
|
|
98 http://en.wikipedia.org/wiki/Least_mean_squares_filter
|
|
99
|
|
100 Credits:
|
|
101
|
|
102 Thanks to Steve Underwood, Jean-Marc Valin, and Ramakrishnan
|
|
103 Muthukrishnan for their suggestions and email discussions. Thanks
|
|
104 also to those people who collected echo samples for me such as
|
|
105 Mark, Pawel, and Pavel.
|
|
106 */
|
|
107
|
|
108 #ifdef HAVE_CONFIG_H
|
|
109 #include <config.h>
|
|
110 #endif
|
|
111 #ifdef __KERNEL__
|
|
112 #include <linux/kernel.h> /* We're doing kernel work */
|
|
113 #include <linux/module.h>
|
|
114 #include <linux/kernel.h>
|
|
115 #include <linux/slab.h>
|
|
116 #define malloc(a) kmalloc((a), GFP_KERNEL)
|
|
117 #define free(a) kfree(a)
|
|
118 #else
|
|
119 #include <stdio.h>
|
|
120 #include <stdlib.h>
|
|
121 #include <string.h>
|
|
122 #include <inttypes.h>
|
|
123
|
|
124 #endif
|
|
125
|
|
126 #include "spandsp/bit_operations.h"
|
|
127 #include "spandsp/echo.h"
|
|
128
|
|
129 #if !defined(NULL)
|
|
130 #define NULL (void *) 0
|
|
131 #endif
|
|
132 #if !defined(FALSE)
|
|
133 #define FALSE 0
|
|
134 #endif
|
|
135 #if !defined(TRUE)
|
|
136 #define TRUE (!FALSE)
|
|
137 #endif
|
|
138
|
|
139 #define MIN_TX_POWER_FOR_ADAPTION 64
|
|
140 #define MIN_RX_POWER_FOR_ADAPTION 64
|
|
141 #define DTD_HANGOVER 600 /* 600 samples, or 75ms */
|
|
142 #define DC_LOG2BETA 3 /* log2() of DC filter Beta */
|
|
143
|
|
144 /*-----------------------------------------------------------------------*\
|
|
145
|
|
146 FUNCTIONS
|
|
147
|
|
148 \*-----------------------------------------------------------------------*/
|
|
149
|
|
150 /* adapting coeffs using the traditional stochastic descent (N)LMS algorithm */
|
|
151
|
|
152
|
|
153 #ifdef __BLACKFIN_ASM__
|
|
154 static void __inline__ lms_adapt_bg(echo_can_state_t *ec, int clean, int shift)
|
|
155 {
|
|
156 int i, j;
|
|
157 int offset1;
|
|
158 int offset2;
|
|
159 int factor;
|
|
160 int exp;
|
|
161 int16_t *phist;
|
|
162 int n;
|
|
163
|
|
164 if (shift > 0)
|
|
165 factor = clean << shift;
|
|
166 else
|
|
167 factor = clean >> -shift;
|
|
168
|
|
169 /* Update the FIR taps */
|
|
170
|
|
171 offset2 = ec->curr_pos;
|
|
172 offset1 = ec->taps - offset2;
|
|
173 phist = &ec->fir_state_bg.history[offset2];
|
|
174
|
|
175 /* st: and en: help us locate the assembler in echo.s */
|
|
176
|
|
177 //asm("st:");
|
|
178 n = ec->taps;
|
|
179 for (i = 0, j = offset2; i < n; i++, j++)
|
|
180 {
|
|
181 exp = *phist++ * factor;
|
|
182 ec->fir_taps16[1][i] += (int16_t) ((exp+(1<<14)) >> 15);
|
|
183 }
|
|
184 //asm("en:");
|
|
185
|
|
186 /* Note the asm for the inner loop above generated by Blackfin gcc
|
|
187 4.1.1 is pretty good (note even parallel instructions used):
|
|
188
|
|
189 R0 = W [P0++] (X);
|
|
190 R0 *= R2;
|
|
191 R0 = R0 + R3 (NS) ||
|
|
192 R1 = W [P1] (X) ||
|
|
193 nop;
|
|
194 R0 >>>= 15;
|
|
195 R0 = R0 + R1;
|
|
196 W [P1++] = R0;
|
|
197
|
|
198 A block based update algorithm would be much faster but the
|
|
199 above can't be improved on much. Every instruction saved in
|
|
200 the loop above is 2 MIPs/ch! The for loop above is where the
|
|
201 Blackfin spends most of it's time - about 17 MIPs/ch measured
|
|
202 with speedtest.c with 256 taps (32ms). Write-back and
|
|
203 Write-through cache gave about the same performance.
|
|
204 */
|
|
205 }
|
|
206
|
|
207 /*
|
|
208 IDEAS for further optimisation of lms_adapt_bg():
|
|
209
|
|
210 1/ The rounding is quite costly. Could we keep as 32 bit coeffs
|
|
211 then make filter pluck the MS 16-bits of the coeffs when filtering?
|
|
212 However this would lower potential optimisation of filter, as I
|
|
213 think the dual-MAC architecture requires packed 16 bit coeffs.
|
|
214
|
|
215 2/ Block based update would be more efficient, as per comments above,
|
|
216 could use dual MAC architecture.
|
|
217
|
|
218 3/ Look for same sample Blackfin LMS code, see if we can get dual-MAC
|
|
219 packing.
|
|
220
|
|
221 4/ Execute the whole e/c in a block of say 20ms rather than sample
|
|
222 by sample. Processing a few samples every ms is inefficient.
|
|
223 */
|
|
224
|
|
225 #else
|
|
226 static __inline__ void lms_adapt_bg(echo_can_state_t *ec, int clean, int shift)
|
|
227 {
|
|
228 int i;
|
|
229
|
|
230 int offset1;
|
|
231 int offset2;
|
|
232 int factor;
|
|
233 int exp;
|
|
234
|
|
235 if (shift > 0)
|
|
236 factor = clean << shift;
|
|
237 else
|
|
238 factor = clean >> -shift;
|
|
239
|
|
240 /* Update the FIR taps */
|
|
241
|
|
242 offset2 = ec->curr_pos;
|
|
243 offset1 = ec->taps - offset2;
|
|
244
|
|
245 for (i = ec->taps - 1; i >= offset1; i--)
|
|
246 {
|
|
247 exp = (ec->fir_state_bg.history[i - offset1]*factor);
|
|
248 ec->fir_taps16[1][i] += (int16_t) ((exp+(1<<14)) >> 15);
|
|
249 }
|
|
250 for ( ; i >= 0; i--)
|
|
251 {
|
|
252 exp = (ec->fir_state_bg.history[i + offset2]*factor);
|
|
253 ec->fir_taps16[1][i] += (int16_t) ((exp+(1<<14)) >> 15);
|
|
254 }
|
|
255 }
|
|
256 #endif
|
|
257
|
|
258 /*- End of function --------------------------------------------------------*/
|
|
259
|
|
260 echo_can_state_t *echo_can_create(int len, int adaption_mode)
|
|
261 {
|
|
262 echo_can_state_t *ec;
|
|
263 int i;
|
|
264 int j;
|
|
265
|
|
266 ec = (echo_can_state_t *) malloc(sizeof(*ec));
|
|
267 if (ec == NULL)
|
|
268 return NULL;
|
|
269 memset(ec, 0, sizeof(*ec));
|
|
270
|
|
271 ec->taps = len;
|
|
272 ec->log2taps = top_bit(len);
|
|
273 ec->curr_pos = ec->taps - 1;
|
|
274
|
|
275 for (i = 0; i < 2; i++)
|
|
276 {
|
|
277 if ((ec->fir_taps16[i] = (int16_t *) malloc((ec->taps)*sizeof(int16_t))) == NULL)
|
|
278 {
|
|
279 for (j = 0; j < i; j++)
|
|
280 free(ec->fir_taps16[j]);
|
|
281 free(ec);
|
|
282 return NULL;
|
|
283 }
|
|
284 memset(ec->fir_taps16[i], 0, (ec->taps)*sizeof(int16_t));
|
|
285 }
|
|
286
|
|
287 fir16_create(&ec->fir_state,
|
|
288 ec->fir_taps16[0],
|
|
289 ec->taps);
|
|
290 fir16_create(&ec->fir_state_bg,
|
|
291 ec->fir_taps16[1],
|
|
292 ec->taps);
|
|
293
|
|
294 for(i=0; i<5; i++) {
|
|
295 ec->xvtx[i] = ec->yvtx[i] = ec->xvrx[i] = ec->yvrx[i] = 0;
|
|
296 }
|
|
297
|
|
298 ec->cng_level = 1000;
|
|
299 echo_can_adaption_mode(ec, adaption_mode);
|
|
300
|
|
301 ec->snapshot = (int16_t*)malloc(ec->taps*sizeof(int16_t));
|
|
302 memset(ec->snapshot, 0, sizeof(int16_t)*ec->taps);
|
|
303
|
|
304 ec->cond_met = 0;
|
|
305 ec->Pstates = 0;
|
|
306 ec->Ltxacc = ec->Lrxacc = ec->Lcleanacc = ec->Lclean_bgacc = 0;
|
|
307 ec->Ltx = ec->Lrx = ec->Lclean = ec->Lclean_bg = 0;
|
|
308 ec->tx_1 = ec->tx_2 = ec->rx_1 = ec->rx_2 = 0;
|
|
309 ec->Lbgn = ec->Lbgn_acc = 0;
|
|
310 ec->Lbgn_upper = 200;
|
|
311 ec->Lbgn_upper_acc = ec->Lbgn_upper << 13;
|
|
312
|
|
313 return ec;
|
|
314 }
|
|
315 /*- End of function --------------------------------------------------------*/
|
|
316
|
|
317 void echo_can_free(echo_can_state_t *ec)
|
|
318 {
|
|
319 int i;
|
|
320
|
|
321 fir16_free(&ec->fir_state);
|
|
322 fir16_free(&ec->fir_state_bg);
|
|
323 for (i = 0; i < 2; i++)
|
|
324 free(ec->fir_taps16[i]);
|
|
325 free(ec->snapshot);
|
|
326 free(ec);
|
|
327 }
|
|
328 /*- End of function --------------------------------------------------------*/
|
|
329
|
|
330 void echo_can_adaption_mode(echo_can_state_t *ec, int adaption_mode)
|
|
331 {
|
|
332 ec->adaption_mode = adaption_mode;
|
|
333 }
|
|
334 /*- End of function --------------------------------------------------------*/
|
|
335
|
|
336 void echo_can_flush(echo_can_state_t *ec)
|
|
337 {
|
|
338 int i;
|
|
339
|
|
340 ec->Ltxacc = ec->Lrxacc = ec->Lcleanacc = ec->Lclean_bgacc = 0;
|
|
341 ec->Ltx = ec->Lrx = ec->Lclean = ec->Lclean_bg = 0;
|
|
342 ec->tx_1 = ec->tx_2 = ec->rx_1 = ec->rx_2 = 0;
|
|
343
|
|
344 ec->Lbgn = ec->Lbgn_acc = 0;
|
|
345 ec->Lbgn_upper = 200;
|
|
346 ec->Lbgn_upper_acc = ec->Lbgn_upper << 13;
|
|
347
|
|
348 ec->nonupdate_dwell = 0;
|
|
349
|
|
350 fir16_flush(&ec->fir_state);
|
|
351 fir16_flush(&ec->fir_state_bg);
|
|
352 ec->fir_state.curr_pos = ec->taps - 1;
|
|
353 ec->fir_state_bg.curr_pos = ec->taps - 1;
|
|
354 for (i = 0; i < 2; i++)
|
|
355 memset(ec->fir_taps16[i], 0, ec->taps*sizeof(int16_t));
|
|
356
|
|
357 ec->curr_pos = ec->taps - 1;
|
|
358 ec->Pstates = 0;
|
|
359 }
|
|
360 /*- End of function --------------------------------------------------------*/
|
|
361
|
|
362 void echo_can_snapshot(echo_can_state_t *ec) {
|
|
363 memcpy(ec->snapshot, ec->fir_taps16[0], ec->taps*sizeof(int16_t));
|
|
364 }
|
|
365 /*- End of function --------------------------------------------------------*/
|
|
366
|
|
367 /* Dual Path Echo Canceller ------------------------------------------------*/
|
|
368
|
|
369 int16_t echo_can_update(echo_can_state_t *ec, int16_t tx, int16_t rx)
|
|
370 {
|
|
371 int32_t echo_value;
|
|
372 int clean_bg;
|
|
373 int tmp, tmp1;
|
|
374
|
|
375 /* Input scaling was found be required to prevent problems when tx
|
|
376 starts clipping. Another possible way to handle this would be the
|
|
377 filter coefficent scaling. */
|
|
378
|
|
379 ec->tx = tx; ec->rx = rx;
|
|
380 tx >>=1;
|
|
381 rx >>=1;
|
|
382
|
|
383 /*
|
|
384 Filter DC, 3dB point is 160Hz (I think), note 32 bit precision required
|
|
385 otherwise values do not track down to 0. Zero at DC, Pole at (1-Beta)
|
|
386 only real axis. Some chip sets (like Si labs) don't need
|
|
387 this, but something like a $10 X100P card does. Any DC really slows
|
|
388 down convergence.
|
|
389
|
|
390 Note: removes some low frequency from the signal, this reduces
|
|
391 the speech quality when listening to samples through headphones
|
|
392 but may not be obvious through a telephone handset.
|
|
393
|
|
394 Note that the 3dB frequency in radians is approx Beta, e.g. for
|
|
395 Beta = 2^(-3) = 0.125, 3dB freq is 0.125 rads = 159Hz.
|
|
396 */
|
|
397
|
|
398 if (ec->adaption_mode & ECHO_CAN_USE_RX_HPF) {
|
|
399 tmp = rx << 15;
|
|
400 #if 1
|
|
401 /* Make sure the gain of the HPF is 1.0. This can still saturate a little under
|
|
402 impulse conditions, and it might roll to 32768 and need clipping on sustained peak
|
|
403 level signals. However, the scale of such clipping is small, and the error due to
|
|
404 any saturation should not markedly affect the downstream processing. */
|
|
405 tmp -= (tmp >> 4);
|
|
406 #endif
|
|
407 ec->rx_1 += -(ec->rx_1>>DC_LOG2BETA) + tmp - ec->rx_2;
|
|
408
|
|
409 /* hard limit filter to prevent clipping. Note that at this stage
|
|
410 rx should be limited to +/- 16383 due to right shift above */
|
|
411 tmp1 = ec->rx_1 >> 15;
|
|
412 if (tmp1 > 16383) tmp1 = 16383;
|
|
413 if (tmp1 < -16383) tmp1 = -16383;
|
|
414 rx = tmp1;
|
|
415 ec->rx_2 = tmp;
|
|
416 }
|
|
417
|
|
418 /* Block average of power in the filter states. Used for
|
|
419 adaption power calculation. */
|
|
420
|
|
421 {
|
|
422 int new, old;
|
|
423
|
|
424 /* efficient "out with the old and in with the new" algorithm so
|
|
425 we don't have to recalculate over the whole block of
|
|
426 samples. */
|
|
427 new = (int)tx * (int)tx;
|
|
428 old = (int)ec->fir_state.history[ec->fir_state.curr_pos] *
|
|
429 (int)ec->fir_state.history[ec->fir_state.curr_pos];
|
|
430 ec->Pstates += ((new - old) + (1<<(ec->log2taps-1))) >> ec->log2taps;
|
|
431 if (ec->Pstates < 0) ec->Pstates = 0;
|
|
432 }
|
|
433
|
|
434 /* Calculate short term average levels using simple single pole IIRs */
|
|
435
|
|
436 ec->Ltxacc += abs(tx) - ec->Ltx;
|
|
437 ec->Ltx = (ec->Ltxacc + (1<<4)) >> 5;
|
|
438 ec->Lrxacc += abs(rx) - ec->Lrx;
|
|
439 ec->Lrx = (ec->Lrxacc + (1<<4)) >> 5;
|
|
440
|
|
441 /* Foreground filter ---------------------------------------------------*/
|
|
442
|
|
443 ec->fir_state.coeffs = ec->fir_taps16[0];
|
|
444 echo_value = fir16(&ec->fir_state, tx);
|
|
445 ec->clean = rx - echo_value;
|
|
446 ec->Lcleanacc += abs(ec->clean) - ec->Lclean;
|
|
447 ec->Lclean = (ec->Lcleanacc + (1<<4)) >> 5;
|
|
448
|
|
449 /* Background filter ---------------------------------------------------*/
|
|
450
|
|
451 echo_value = fir16(&ec->fir_state_bg, tx);
|
|
452 clean_bg = rx - echo_value;
|
|
453 ec->Lclean_bgacc += abs(clean_bg) - ec->Lclean_bg;
|
|
454 ec->Lclean_bg = (ec->Lclean_bgacc + (1<<4)) >> 5;
|
|
455
|
|
456 /* Background Filter adaption -----------------------------------------*/
|
|
457
|
|
458 /* Almost always adap bg filter, just simple DT and energy
|
|
459 detection to minimise adaption in cases of strong double talk.
|
|
460 However this is not critical for the dual path algorithm.
|
|
461 */
|
|
462 ec->factor = 0;
|
|
463 ec->shift = 0;
|
|
464 if ((ec->nonupdate_dwell == 0)) {
|
|
465 int P, logP, shift;
|
|
466
|
|
467 /* Determine:
|
|
468
|
|
469 f = Beta * clean_bg_rx/P ------ (1)
|
|
470
|
|
471 where P is the total power in the filter states.
|
|
472
|
|
473 The Boffins have shown that if we obey (1) we converge
|
|
474 quickly and avoid instability.
|
|
475
|
|
476 The correct factor f must be in Q30, as this is the fixed
|
|
477 point format required by the lms_adapt_bg() function,
|
|
478 therefore the scaled version of (1) is:
|
|
479
|
|
480 (2^30) * f = (2^30) * Beta * clean_bg_rx/P
|
|
481 factor = (2^30) * Beta * clean_bg_rx/P ----- (2)
|
|
482
|
|
483 We have chosen Beta = 0.25 by experiment, so:
|
|
484
|
|
485 factor = (2^30) * (2^-2) * clean_bg_rx/P
|
|
486
|
|
487 (30 - 2 - log2(P))
|
|
488 factor = clean_bg_rx 2 ----- (3)
|
|
489
|
|
490 To avoid a divide we approximate log2(P) as top_bit(P),
|
|
491 which returns the position of the highest non-zero bit in
|
|
492 P. This approximation introduces an error as large as a
|
|
493 factor of 2, but the algorithm seems to handle it OK.
|
|
494
|
|
495 Come to think of it a divide may not be a big deal on a
|
|
496 modern DSP, so its probably worth checking out the cycles
|
|
497 for a divide versus a top_bit() implementation.
|
|
498 */
|
|
499
|
|
500 P = MIN_TX_POWER_FOR_ADAPTION + ec->Pstates;
|
|
501 logP = top_bit(P) + ec->log2taps;
|
|
502 shift = 30 - 2 - logP;
|
|
503 ec->shift = shift;
|
|
504
|
|
505 lms_adapt_bg(ec, clean_bg, shift);
|
|
506 }
|
|
507
|
|
508 /* very simple DTD to make sure we dont try and adapt with strong
|
|
509 near end speech */
|
|
510
|
|
511 ec->adapt = 0;
|
|
512 if ((ec->Lrx > MIN_RX_POWER_FOR_ADAPTION) && (ec->Lrx > ec->Ltx))
|
|
513 ec->nonupdate_dwell = DTD_HANGOVER;
|
|
514 if (ec->nonupdate_dwell)
|
|
515 ec->nonupdate_dwell--;
|
|
516
|
|
517 /* Transfer logic ------------------------------------------------------*/
|
|
518
|
|
519 /* These conditions are from the dual path paper [1], I messed with
|
|
520 them a bit to improve performance. */
|
|
521
|
|
522 if ((ec->adaption_mode & ECHO_CAN_USE_ADAPTION) &&
|
|
523 (ec->nonupdate_dwell == 0) &&
|
|
524 (8*ec->Lclean_bg < 7*ec->Lclean) /* (ec->Lclean_bg < 0.875*ec->Lclean) */ &&
|
|
525 (8*ec->Lclean_bg < ec->Ltx) /* (ec->Lclean_bg < 0.125*ec->Ltx) */ )
|
|
526 {
|
|
527 if (ec->cond_met == 6) {
|
|
528 /* BG filter has had better results for 6 consecutive samples */
|
|
529 ec->adapt = 1;
|
|
530 memcpy(ec->fir_taps16[0], ec->fir_taps16[1], ec->taps*sizeof(int16_t));
|
|
531 }
|
|
532 else
|
|
533 ec->cond_met++;
|
|
534 }
|
|
535 else
|
|
536 ec->cond_met = 0;
|
|
537
|
|
538 /* Non-Linear Processing ---------------------------------------------------*/
|
|
539
|
|
540 ec->clean_nlp = ec->clean;
|
|
541 if (ec->adaption_mode & ECHO_CAN_USE_NLP)
|
|
542 {
|
|
543 /* Non-linear processor - a fancy way to say "zap small signals, to avoid
|
|
544 residual echo due to (uLaw/ALaw) non-linearity in the channel.". */
|
|
545
|
|
546 if ((16*ec->Lclean < ec->Ltx))
|
|
547 {
|
|
548 /* Our e/c has improved echo by at least 24 dB (each factor of 2 is 6dB,
|
|
549 so 2*2*2*2=16 is the same as 6+6+6+6=24dB) */
|
|
550 if (ec->adaption_mode & ECHO_CAN_USE_CNG)
|
|
551 {
|
|
552 ec->cng_level = ec->Lbgn;
|
|
553
|
|
554 /* Very elementary comfort noise generation. Just random
|
|
555 numbers rolled off very vaguely Hoth-like. DR: This
|
|
556 noise doesn't sound quite right to me - I suspect there
|
|
557 are some overlfow issues in the filtering as it's too
|
|
558 "crackly". TODO: debug this, maybe just play noise at
|
|
559 high level or look at spectrum.
|
|
560 */
|
|
561
|
|
562 ec->cng_rndnum = 1664525U*ec->cng_rndnum + 1013904223U;
|
|
563 ec->cng_filter = ((ec->cng_rndnum & 0xFFFF) - 32768 + 5*ec->cng_filter) >> 3;
|
|
564 ec->clean_nlp = (ec->cng_filter*ec->cng_level*8) >> 14;
|
|
565
|
|
566 }
|
|
567 else if (ec->adaption_mode & ECHO_CAN_USE_CLIP)
|
|
568 {
|
|
569 /* This sounds much better than CNG */
|
|
570 if (ec->clean_nlp > ec->Lbgn)
|
|
571 ec->clean_nlp = ec->Lbgn;
|
|
572 if (ec->clean_nlp < -ec->Lbgn)
|
|
573 ec->clean_nlp = -ec->Lbgn;
|
|
574 }
|
|
575 else
|
|
576 {
|
|
577 /* just mute the residual, doesn't sound very good, used mainly
|
|
578 in G168 tests */
|
|
579 ec->clean_nlp = 0;
|
|
580 }
|
|
581 }
|
|
582 else {
|
|
583 /* Background noise estimator. I tried a few algorithms
|
|
584 here without much luck. This very simple one seems to
|
|
585 work best, we just average the level using a slow (1 sec
|
|
586 time const) filter if the current level is less than a
|
|
587 (experimentally derived) constant. This means we dont
|
|
588 include high level signals like near end speech. When
|
|
589 combined with CNG or especially CLIP seems to work OK.
|
|
590 */
|
|
591 if (ec->Lclean < 40) {
|
|
592 ec->Lbgn_acc += abs(ec->clean) - ec->Lbgn;
|
|
593 ec->Lbgn = (ec->Lbgn_acc + (1<<11)) >> 12;
|
|
594 }
|
|
595 }
|
|
596 }
|
|
597
|
|
598 /* Roll around the taps buffer */
|
|
599 if (ec->curr_pos <= 0)
|
|
600 ec->curr_pos = ec->taps;
|
|
601 ec->curr_pos--;
|
|
602
|
|
603 if (ec->adaption_mode & ECHO_CAN_DISABLE)
|
|
604 ec->clean_nlp = rx;
|
|
605
|
|
606 /* Output scaled back up again to match input scaling */
|
|
607
|
|
608 return (int16_t) ec->clean_nlp << 1;
|
|
609 }
|
|
610
|
|
611 /*- End of function --------------------------------------------------------*/
|
|
612
|
|
613 /* This function is seperated from the echo canceller is it is usually called
|
|
614 as part of the tx process. See rx HP (DC blocking) filter above, it's
|
|
615 the same design.
|
|
616
|
|
617 Some soft phones send speech signals with a lot of low frequency
|
|
618 energy, e.g. down to 20Hz. This can make the hybrid non-linear
|
|
619 which causes the echo canceller to fall over. This filter can help
|
|
620 by removing any low frequency before it gets to the tx port of the
|
|
621 hybrid.
|
|
622
|
|
623 It can also help by removing and DC in the tx signal. DC is bad
|
|
624 for LMS algorithms.
|
|
625
|
|
626 This is one of the classic DC removal filters, adjusted to provide sufficient
|
|
627 bass rolloff to meet the above requirement to protect hybrids from things that
|
|
628 upset them. The difference between successive samples produces a lousy HPF, and
|
|
629 then a suitably placed pole flattens things out. The final result is a nicely
|
|
630 rolled off bass end. The filtering is implemented with extended fractional
|
|
631 precision, which noise shapes things, giving very clean DC removal.
|
|
632 */
|
|
633
|
|
634 int16_t echo_can_hpf_tx(echo_can_state_t *ec, int16_t tx) {
|
|
635 int tmp, tmp1;
|
|
636
|
|
637 if (ec->adaption_mode & ECHO_CAN_USE_TX_HPF) {
|
|
638 tmp = tx << 15;
|
|
639 #if 1
|
|
640 /* Make sure the gain of the HPF is 1.0. The first can still saturate a little under
|
|
641 impulse conditions, and it might roll to 32768 and need clipping on sustained peak
|
|
642 level signals. However, the scale of such clipping is small, and the error due to
|
|
643 any saturation should not markedly affect the downstream processing. */
|
|
644 tmp -= (tmp >> 4);
|
|
645 #endif
|
|
646 ec->tx_1 += -(ec->tx_1>>DC_LOG2BETA) + tmp - ec->tx_2;
|
|
647 tmp1 = ec->tx_1 >> 15;
|
|
648 if (tmp1 > 32767) tmp1 = 32767;
|
|
649 if (tmp1 < -32767) tmp1 = -32767;
|
|
650 tx = tmp1;
|
|
651 ec->tx_2 = tmp;
|
|
652 }
|
|
653
|
|
654 return tx;
|
|
655 }
|
|
656
|
|
657 /*- End of function --------------------------------------------------------*/
|
|
658 /*- End of file ------------------------------------------------------------*/
|