5
|
1 /*
|
|
2 * SpanDSP - a series of DSP components for telephony
|
|
3 *
|
|
4 * time_scale.h - Time scaling for linear speech data
|
|
5 *
|
|
6 * Written by Steve Underwood <steveu@coppice.org>
|
|
7 *
|
|
8 * Copyright (C) 2004 Steve Underwood
|
|
9 *
|
|
10 * All rights reserved.
|
|
11 *
|
|
12 * This program is free software; you can redistribute it and/or modify
|
|
13 * it under the terms of the GNU General Public License version 2, as
|
|
14 * published by the Free Software Foundation.
|
|
15 *
|
|
16 * This program is distributed in the hope that it will be useful,
|
|
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
19 * GNU General Public License for more details.
|
|
20 *
|
|
21 * You should have received a copy of the GNU General Public License
|
|
22 * along with this program; if not, write to the Free Software
|
|
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
|
24 *
|
|
25 * $Id: time_scale.h,v 1.7 2006/10/24 13:45:28 steveu Exp $
|
|
26 */
|
|
27
|
|
28 #if !defined(_TIME_SCALE_H_)
|
|
29 #define _TIME_SCALE_H_
|
|
30
|
|
31 /*! \page time_scale_page Time scaling speech
|
|
32 \section time_scale_page_sec_1 What does it do?
|
|
33 The time scaling module allows speech files to be played back at a
|
|
34 different speed, from the speed at which they were recorded. If this
|
|
35 were done by simply speeding up or slowing down replay, the pitch of
|
|
36 the voice would change, and sound very odd. This modules keeps the pitch
|
|
37 of the voice normal.
|
|
38
|
|
39 \section time_scale_page_sec_2 How does it work?
|
|
40 The time scaling module is based on the Pointer Interval Controlled
|
|
41 OverLap and Add (PICOLA) method, developed by Morita Naotaka.
|
|
42 Mikio Ikeda has an excellent web page on this subject at
|
|
43 http://keizai.yokkaichi-u.ac.jp/~ikeda/research/picola.html
|
|
44 There is also working code there. This implementation uses
|
|
45 exactly the same algorithms, but the code is a complete rewrite.
|
|
46 Mikio's code batch processes files. This version works incrementally
|
|
47 on streams, and allows multiple streams to be processed concurrently.
|
|
48 */
|
|
49
|
|
50 #define TIME_SCALE_MIN_PITCH 60
|
|
51 #define TIME_SCALE_MAX_PITCH 250
|
|
52 #define TIME_SCALE_BUF_LEN (2*SAMPLE_RATE/TIME_SCALE_MIN_PITCH)
|
|
53
|
|
54 typedef struct
|
|
55 {
|
|
56 double rate;
|
|
57 double rcomp;
|
|
58 double rate_nudge;
|
|
59 int fill;
|
|
60 int lcp;
|
|
61 int16_t buf[TIME_SCALE_BUF_LEN];
|
|
62 } time_scale_t;
|
|
63
|
|
64 #ifdef __cplusplus
|
|
65 extern "C" {
|
|
66 #endif
|
|
67
|
|
68 /*! Initialise a time scale context. This must be called before the first
|
|
69 use of the context, to initialise its contents.
|
|
70 \brief Initialise a time scale context.
|
|
71 \param s The time scale context.
|
|
72 \param rate The ratio between the output speed and the input speed.
|
|
73 \return 0 if initialised OK, else -1. */
|
|
74 int time_scale_init(time_scale_t *s, float rate);
|
|
75
|
|
76 /*! Change the time scale rate.
|
|
77 \brief Change the time scale rate.
|
|
78 \param s The time scale context.
|
|
79 \param rate The ratio between the output speed and the input speed.
|
|
80 \return 0 if changed OK, else -1. */
|
|
81 int time_scale_rate(time_scale_t *s, float rate);
|
|
82
|
|
83 /*! Time scale a chunk of audio samples.
|
|
84 \brief Time scale a chunk of audio samples.
|
|
85 \param s The time sclae context.
|
|
86 \param out The output audio sample buffer.
|
|
87 \param in The input audio sample buffer.
|
|
88 \param len The number of input samples.
|
|
89 \return The number of output samples.
|
|
90 */
|
|
91 int time_scale(time_scale_t *s, int16_t out[], int16_t in[], int len);
|
|
92
|
|
93 #ifdef __cplusplus
|
|
94 }
|
|
95 #endif
|
|
96
|
|
97 #endif
|
|
98 /*- End of file ------------------------------------------------------------*/
|