5
|
1 /*
|
|
2 * SpanDSP - a series of DSP components for telephony
|
|
3 *
|
|
4 * plc.h
|
|
5 *
|
|
6 * Written by Steve Underwood <steveu@coppice.org>
|
|
7 *
|
|
8 * Copyright (C) 2004 Steve Underwood
|
|
9 *
|
|
10 * All rights reserved.
|
|
11 *
|
|
12 * This program is free software; you can redistribute it and/or modify
|
|
13 * it under the terms of the GNU General Public License version 2, as
|
|
14 * published by the Free Software Foundation.
|
|
15 *
|
|
16 * This program is distributed in the hope that it will be useful,
|
|
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
19 * GNU General Public License for more details.
|
|
20 *
|
|
21 * You should have received a copy of the GNU General Public License
|
|
22 * along with this program; if not, write to the Free Software
|
|
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
|
24 *
|
|
25 * $Id: plc.h,v 1.12 2006/10/24 13:45:28 steveu Exp $
|
|
26 */
|
|
27
|
|
28 /*! \file */
|
|
29
|
|
30 #if !defined(_PLC_H_)
|
|
31 #define _PLC_H_
|
|
32
|
|
33 /*! \page plc_page Packet loss concealment
|
|
34 \section plc_page_sec_1 What does it do?
|
|
35 The packet loss concealment module provides a synthetic fill-in signal, to minimise
|
|
36 the audible effect of lost packets in VoIP applications. It is not tied to any
|
|
37 particular codec, and could be used with almost any codec which does not
|
|
38 specify its own procedure for packet loss concealment.
|
|
39
|
|
40 Where a codec specific concealment procedure exists, that algorithm is usually built
|
|
41 around knowledge of the characteristics of the particular codec. It will, therefore,
|
|
42 generally give better results for that particular codec than this generic concealer will.
|
|
43
|
|
44 The PLC code implements an algorithm similar to the one described in Appendix 1 of G.711.
|
|
45 However, the G.711 algorithm is optimised for 10ms packets. Few people use such small
|
|
46 packets. 20ms is a much more common value, and longer packets are also quite common. The
|
|
47 algorithm has been adjusted with this in mind. Also, the G.711 approach causes an
|
|
48 algorithmic delay, and requires significant buffer manipulation when there is no packet
|
|
49 loss. The algorithm used here avoids this. It causes no delay, and achieves comparable
|
|
50 quality with normal speech.
|
|
51
|
|
52 Note that both this algorithm, and the one in G.711 are optimised for speech. For most kinds
|
|
53 of music a much slower decay on bursts of lost packets give better results.
|
|
54
|
|
55 \section plc_page_sec_2 How does it work?
|
|
56 While good packets are being received, the plc_rx() routine keeps a record of the trailing
|
|
57 section of the known speech signal. If a packet is missed, plc_fillin() is called to produce
|
|
58 a synthetic replacement for the real speech signal. The average mean difference function
|
|
59 (AMDF) is applied to the last known good signal, to determine its effective pitch.
|
|
60 Based on this, the last pitch period of signal is saved. Essentially, this cycle of speech
|
|
61 will be repeated over and over until the real speech resumes. However, several refinements
|
|
62 are needed to obtain smooth pleasant sounding results.
|
|
63
|
|
64 - The two ends of the stored cycle of speech will not always fit together smoothly. This can
|
|
65 cause roughness, or even clicks, at the joins between cycles. To soften this, the
|
|
66 1/4 pitch period of real speech preceeding the cycle to be repeated is blended with the last
|
|
67 1/4 pitch period of the cycle to be repeated, using an overlap-add (OLA) technique (i.e.
|
|
68 in total, the last 5/4 pitch periods of real speech are used).
|
|
69
|
|
70 - The start of the synthetic speech will not always fit together smoothly with the tail of
|
|
71 real speech passed on before the erasure was identified. Ideally, we would like to modify
|
|
72 the last 1/4 pitch period of the real speech, to blend it into the synthetic speech. However,
|
|
73 it is too late for that. We could have delayed the real speech a little, but that would
|
|
74 require more buffer manipulation, and hurt the efficiency of the no-lost-packets case
|
|
75 (which we hope is the dominant case). Instead we use a degenerate form of OLA to modify
|
|
76 the start of the synthetic data. The last 1/4 pitch period of real speech is time reversed,
|
|
77 and OLA is used to blend it with the first 1/4 pitch period of synthetic speech. The result
|
|
78 seems quite acceptable.
|
|
79
|
|
80 - As we progress into the erasure, the chances of the synthetic signal being anything like
|
|
81 correct steadily fall. Therefore, the volume of the synthesized signal is made to decay
|
|
82 linearly, such that after 50ms of missing audio it is reduced to silence.
|
|
83
|
|
84 - When real speech resumes, an extra 1/4 pitch period of synthetic speech is blended with the
|
|
85 start of the real speech. If the erasure is small, this smoothes the transition. If the erasure
|
|
86 is long, and the synthetic signal has faded to zero, the blending softens the start up of the
|
|
87 real signal, avoiding a kind of "click" or "pop" effect that might occur with a sudden onset.
|
|
88
|
|
89 \section plc_page_sec_3 How do I use it?
|
|
90 Before audio is processed, call plc_init() to create an instance of the packet loss
|
|
91 concealer. For each received audio packet that is acceptable (i.e. not including those being
|
|
92 dropped for being too late) call plc_rx() to record the content of the packet. Note this may
|
|
93 modify the packet a little after a period of packet loss, to blend real synthetic data smoothly.
|
|
94 When a real packet is not available in time, call plc_fillin() to create a sythetic substitute.
|
|
95 That's it!
|
|
96 */
|
|
97
|
|
98 /*! Minimum allowed pitch (66 Hz) */
|
|
99 #define PLC_PITCH_MIN 120
|
|
100 /*! Maximum allowed pitch (200 Hz) */
|
|
101 #define PLC_PITCH_MAX 40
|
|
102 /*! Maximum pitch OLA window */
|
|
103 #define PLC_PITCH_OVERLAP_MAX (PLC_PITCH_MIN >> 2)
|
|
104 /*! The length over which the AMDF function looks for similarity (20 ms) */
|
|
105 #define CORRELATION_SPAN 160
|
|
106 /*! History buffer length. The buffer much also be at leat 1.25 times
|
|
107 PLC_PITCH_MIN, but that is much smaller than the buffer needs to be for
|
|
108 the pitch assessment. */
|
|
109 #define PLC_HISTORY_LEN (CORRELATION_SPAN + PLC_PITCH_MIN)
|
|
110
|
|
111 /*!
|
|
112 The generic packet loss concealer context.
|
|
113 */
|
|
114 typedef struct
|
|
115 {
|
|
116 /*! Consecutive erased samples */
|
|
117 int missing_samples;
|
|
118 /*! Current offset into pitch period */
|
|
119 int pitch_offset;
|
|
120 /*! Pitch estimate */
|
|
121 int pitch;
|
|
122 /*! Buffer for a cycle of speech */
|
|
123 float pitchbuf[PLC_PITCH_MIN];
|
|
124 /*! History buffer */
|
|
125 int16_t history[PLC_HISTORY_LEN];
|
|
126 /*! Current pointer into the history buffer */
|
|
127 int buf_ptr;
|
|
128 } plc_state_t;
|
|
129
|
|
130
|
|
131 #ifdef __cplusplus
|
|
132 extern "C" {
|
|
133 #endif
|
|
134
|
|
135 /*! Process a block of received audio samples for PLC.
|
|
136 \brief Process a block of received audio samples for PLC.
|
|
137 \param s The packet loss concealer context.
|
|
138 \param amp The audio sample buffer.
|
|
139 \param len The number of samples in the buffer.
|
|
140 \return The number of samples in the buffer. */
|
|
141 int plc_rx(plc_state_t *s, int16_t amp[], int len);
|
|
142
|
|
143 /*! Fill-in a block of missing audio samples.
|
|
144 \brief Fill-in a block of missing audio samples.
|
|
145 \param s The packet loss concealer context.
|
|
146 \param amp The audio sample buffer.
|
|
147 \param len The number of samples to be synthesised.
|
|
148 \return The number of samples synthesized. */
|
|
149 int plc_fillin(plc_state_t *s, int16_t amp[], int len);
|
|
150
|
|
151 /*! Initialise a packet loss concealer context.
|
|
152 \brief Initialise a PLC context.
|
|
153 \param s The packet loss concealer context.
|
|
154 \return A pointer to the the packet loss concealer context. */
|
|
155 plc_state_t *plc_init(plc_state_t *s);
|
|
156
|
|
157 /*! Free a packet loss concealer context.
|
|
158 \param s The packet loss concealer context.
|
|
159 \return 0 for OK. */
|
|
160 int plc_release(plc_state_t *s);
|
|
161
|
|
162 #ifdef __cplusplus
|
|
163 }
|
|
164 #endif
|
|
165
|
|
166 #endif
|
|
167 /*- End of file ------------------------------------------------------------*/
|