Mercurial > hg > audiostuff
comparison spandsp-0.0.6pre17/src/spandsp/fast_convert.h @ 4:26cd8f1ef0b1
import spandsp-0.0.6pre17
author | Peter Meerwald <pmeerw@cosy.sbg.ac.at> |
---|---|
date | Fri, 25 Jun 2010 15:50:58 +0200 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
3:c6c5a16ce2f2 | 4:26cd8f1ef0b1 |
---|---|
1 /* | |
2 * SpanDSP - a series of DSP components for telephony | |
3 * | |
4 * fast_convert.h - Quick ways to convert floating point numbers to integers | |
5 * | |
6 * Written by Steve Underwood <steveu@coppice.org> | |
7 * | |
8 * Copyright (C) 2009 Steve Underwood | |
9 * | |
10 * All rights reserved. | |
11 * | |
12 * This program is free software; you can redistribute it and/or modify | |
13 * it under the terms of the GNU Lesser General Public License version 2.1, | |
14 * as published by the Free Software Foundation. | |
15 * | |
16 * This program is distributed in the hope that it will be useful, | |
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
19 * GNU Lesser General Public License for more details. | |
20 * | |
21 * You should have received a copy of the GNU Lesser General Public | |
22 * License along with this program; if not, write to the Free Software | |
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
24 * | |
25 * $Id: fast_convert.h,v 1.9 2009/10/03 04:37:25 steveu Exp $ | |
26 */ | |
27 | |
28 #if !defined(_SPANDSP_FAST_CONVERT_H_) | |
29 #define _SPANDSP_FAST_CONVERT_H_ | |
30 | |
31 #if defined(__cplusplus) | |
32 extern "C" | |
33 { | |
34 #endif | |
35 | |
36 /* The following code, to handle issues with lrint() and lrintf() on various | |
37 * platforms, is adapted from similar code in libsndfile, which is: | |
38 * | |
39 * Copyright (C) 2001-2004 Erik de Castro Lopo <erikd@mega-nerd.com> | |
40 * | |
41 * This program is free software; you can redistribute it and/or modify | |
42 * it under the terms of the GNU Lesser General Public License as published by | |
43 * the Free Software Foundation; either version 2.1 of the License, or | |
44 * (at your option) any later version. | |
45 * | |
46 * This program is distributed in the hope that it will be useful, | |
47 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
48 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
49 * GNU Lesser General Public License for more details. | |
50 */ | |
51 | |
52 /* | |
53 * On Intel Pentium processors (especially PIII and probably P4), converting | |
54 * from float to int is very slow. To meet the C specs, the code produced by | |
55 * most C compilers targeting Pentium needs to change the FPU rounding mode | |
56 * before the float to int conversion is performed. | |
57 * | |
58 * Changing the FPU rounding mode causes the FPU pipeline to be flushed. It | |
59 * is this flushing of the pipeline which is so slow. | |
60 * | |
61 * Fortunately the ISO C99 specification defines the functions lrint, lrintf, | |
62 * llrint and llrintf which fix this problem as a side effect. | |
63 * | |
64 * On Unix-like systems, the configure process should have detected the | |
65 * presence of these functions. If they weren't found we have to replace them | |
66 * here with a standard C cast. | |
67 */ | |
68 | |
69 /* | |
70 * The C99 prototypes for these functions are as follows: | |
71 * | |
72 * int rintf(float x); | |
73 * int rint(double x); | |
74 * long int lrintf(float x); | |
75 * long int lrint(double x); | |
76 * long long int llrintf(float x); | |
77 * long long int llrint(double x); | |
78 * | |
79 * The presence of the required functions are detected during the configure | |
80 * process and the values HAVE_LRINT and HAVE_LRINTF are set accordingly in | |
81 * the config file. | |
82 */ | |
83 | |
84 #if defined(__CYGWIN__) | |
85 #if !defined(__cplusplus) | |
86 /* | |
87 * CYGWIN has lrint and lrintf functions, but they are slow and buggy: | |
88 * http://sourceware.org/ml/cygwin/2005-06/msg00153.html | |
89 * http://sourceware.org/ml/cygwin/2005-09/msg00047.html | |
90 * The latest version of cygwin seems to have made no effort to fix this. | |
91 * These replacement functions (pulled from the Public Domain MinGW | |
92 * math.h header) replace the native versions. | |
93 */ | |
94 static __inline__ long int lrint(double x) | |
95 { | |
96 long int retval; | |
97 | |
98 __asm__ __volatile__ | |
99 ( | |
100 "fistpl %0" | |
101 : "=m" (retval) | |
102 : "t" (x) | |
103 : "st" | |
104 ); | |
105 | |
106 return retval; | |
107 } | |
108 | |
109 static __inline__ long int lrintf(float x) | |
110 { | |
111 long int retval; | |
112 | |
113 __asm__ __volatile__ | |
114 ( | |
115 "fistpl %0" | |
116 : "=m" (retval) | |
117 : "t" (x) | |
118 : "st" | |
119 ); | |
120 return retval; | |
121 } | |
122 #endif | |
123 | |
124 /* The fastest way to convert is the equivalent of lrint() */ | |
125 static __inline__ long int lfastrint(double x) | |
126 { | |
127 long int retval; | |
128 | |
129 __asm__ __volatile__ | |
130 ( | |
131 "fistpl %0" | |
132 : "=m" (retval) | |
133 : "t" (x) | |
134 : "st" | |
135 ); | |
136 | |
137 return retval; | |
138 } | |
139 | |
140 static __inline__ long int lfastrintf(float x) | |
141 { | |
142 long int retval; | |
143 | |
144 __asm__ __volatile__ | |
145 ( | |
146 "fistpl %0" | |
147 : "=m" (retval) | |
148 : "t" (x) | |
149 : "st" | |
150 ); | |
151 return retval; | |
152 } | |
153 #elif defined(__GNUC__) || (__SUNPRO_C >= 0x0590) | |
154 | |
155 #if defined(__i386__) | |
156 /* These routines are guaranteed fast on an i386 machine. Using the built in | |
157 lrint() and lrintf() should be similar, but they may not always be enabled. | |
158 Sometimes, especially with "-O0", you might get slow calls to routines. */ | |
159 static __inline__ long int lfastrint(double x) | |
160 { | |
161 long int retval; | |
162 | |
163 __asm__ __volatile__ | |
164 ( | |
165 "fistpl %0" | |
166 : "=m" (retval) | |
167 : "t" (x) | |
168 : "st" | |
169 ); | |
170 | |
171 return retval; | |
172 } | |
173 | |
174 static __inline__ long int lfastrintf(float x) | |
175 { | |
176 long int retval; | |
177 | |
178 __asm__ __volatile__ | |
179 ( | |
180 "fistpl %0" | |
181 : "=m" (retval) | |
182 : "t" (x) | |
183 : "st" | |
184 ); | |
185 return retval; | |
186 } | |
187 #elif defined(__x86_64__) | |
188 /* On an x86_64 machine, the fastest thing seems to be a pure assignment from a | |
189 double or float to an int. It looks like the design on the x86_64 took account | |
190 of the default behaviour specified for C. */ | |
191 static __inline__ long int lfastrint(double x) | |
192 { | |
193 return (long int) (x); | |
194 } | |
195 | |
196 static __inline__ long int lfastrintf(float x) | |
197 { | |
198 return (long int) (x); | |
199 } | |
200 #elif defined(__ppc__) || defined(__powerpc__) | |
201 static __inline__ long int lfastrint(register double x) | |
202 { | |
203 int res[2]; | |
204 | |
205 __asm__ __volatile__ | |
206 ( | |
207 "fctiw %1, %1\n\t" | |
208 "stfd %1, %0" | |
209 : "=m" (res) /* Output */ | |
210 : "f" (x) /* Input */ | |
211 : "memory" | |
212 ); | |
213 | |
214 return res[1]; | |
215 } | |
216 | |
217 static __inline__ long int lfastrintf(register float x) | |
218 { | |
219 int res[2]; | |
220 | |
221 __asm__ __volatile__ | |
222 ( | |
223 "fctiw %1, %1\n\t" | |
224 "stfd %1, %0" | |
225 : "=m" (res) /* Output */ | |
226 : "f" (x) /* Input */ | |
227 : "memory" | |
228 ); | |
229 | |
230 return res[1]; | |
231 } | |
232 #else | |
233 /* Fallback routines, for unrecognised platforms */ | |
234 static __inline__ long int lfastrint(double x) | |
235 { | |
236 return (long int) x; | |
237 } | |
238 | |
239 static __inline__ long int lfastrintf(float x) | |
240 { | |
241 return (long int) x; | |
242 } | |
243 #endif | |
244 | |
245 #elif defined(_M_IX86) | |
246 /* Visual Studio i386 */ | |
247 /* | |
248 * Win32 doesn't seem to have the lrint() and lrintf() functions. | |
249 * Therefore implement inline versions of these functions here. | |
250 */ | |
251 | |
252 __inline long int lrint(double x) | |
253 { | |
254 long int i; | |
255 | |
256 _asm | |
257 { | |
258 fld x | |
259 fistp i | |
260 }; | |
261 return i; | |
262 } | |
263 | |
264 __inline long int lrintf(float x) | |
265 { | |
266 long int i; | |
267 | |
268 _asm | |
269 { | |
270 fld x | |
271 fistp i | |
272 }; | |
273 return i; | |
274 } | |
275 | |
276 __inline float rintf(float flt) | |
277 { | |
278 _asm | |
279 { fld flt | |
280 frndint | |
281 } | |
282 } | |
283 | |
284 __inline double rint(double dbl) | |
285 { | |
286 _asm | |
287 { | |
288 fld dbl | |
289 frndint | |
290 } | |
291 } | |
292 | |
293 __inline long int lfastrint(double x) | |
294 { | |
295 long int i; | |
296 | |
297 _asm | |
298 { | |
299 fld x | |
300 fistp i | |
301 }; | |
302 return i; | |
303 } | |
304 | |
305 __inline long int lfastrintf(float x) | |
306 { | |
307 long int i; | |
308 | |
309 _asm | |
310 { | |
311 fld x | |
312 fistp i | |
313 }; | |
314 return i; | |
315 } | |
316 #elif defined(_M_X64) | |
317 /* Visual Studio x86_64 */ | |
318 /* x86_64 machines will do best with a simple assignment. */ | |
319 #include <intrin.h> | |
320 | |
321 __inline long int lrint(double x) | |
322 { | |
323 return (long int)_mm_cvtsd_si64x( _mm_loadu_pd ((const double*)&x) ); | |
324 } | |
325 | |
326 __inline long int lrintf(float x) | |
327 { | |
328 return _mm_cvt_ss2si( _mm_load_ss((const float*)&x) ); | |
329 } | |
330 | |
331 __inline long int lfastrint(double x) | |
332 { | |
333 return (long int) (x); | |
334 } | |
335 | |
336 __inline long int lfastrintf(float x) | |
337 { | |
338 return (long int) (x); | |
339 } | |
340 #elif defined(__MWERKS__) && defined(macintosh) | |
341 /* This MacOS 9 solution was provided by Stephane Letz */ | |
342 | |
343 long int __inline__ lfastrint(register double x) | |
344 { | |
345 long int res[2]; | |
346 | |
347 asm | |
348 { | |
349 fctiw x, x | |
350 stfd x, res | |
351 } | |
352 return res[1]; | |
353 } | |
354 | |
355 long int __inline__ lfastrintf(register float x) | |
356 { | |
357 long int res[2]; | |
358 | |
359 asm | |
360 { | |
361 fctiw x, x | |
362 stfd x, res | |
363 } | |
364 return res[1]; | |
365 } | |
366 #elif defined(__MACH__) && defined(__APPLE__) && (defined(__ppc__) || defined(__powerpc__)) | |
367 /* For Apple Mac OS/X - do recent versions still need this? */ | |
368 | |
369 static __inline__ long int lfastrint(register double x) | |
370 { | |
371 int res[2]; | |
372 | |
373 __asm__ __volatile__ | |
374 ( | |
375 "fctiw %1, %1\n\t" | |
376 "stfd %1, %0" | |
377 : "=m" (res) /* Output */ | |
378 : "f" (x) /* Input */ | |
379 : "memory" | |
380 ); | |
381 | |
382 return res[1]; | |
383 } | |
384 | |
385 static __inline__ long int lfastrintf(register float x) | |
386 { | |
387 int res[2]; | |
388 | |
389 __asm__ __volatile__ | |
390 ( | |
391 "fctiw %1, %1\n\t" | |
392 "stfd %1, %0" | |
393 : "=m" (res) /* Output */ | |
394 : "f" (x) /* Input */ | |
395 : "memory" | |
396 ); | |
397 | |
398 return res[1]; | |
399 } | |
400 #else | |
401 /* There is nothing else to do, but use a simple casting operation, instead of a real | |
402 rint() type function. Since we are only trying to use rint() to speed up conversions, | |
403 the accuracy issues related to changing the rounding scheme are of little concern | |
404 to us. */ | |
405 | |
406 #if !defined(__sgi) && !defined(__sunos) && !defined(__solaris) && !defined(__sun) | |
407 #warning "No usable lrint() and lrintf() functions available." | |
408 #warning "Replacing these functions with a simple C cast." | |
409 #endif | |
410 | |
411 static __inline__ long int lrint(double x) | |
412 { | |
413 return (long int) (x); | |
414 } | |
415 | |
416 static __inline__ long int lrintf(float x) | |
417 { | |
418 return (long int) (x); | |
419 } | |
420 | |
421 static __inline__ long int lfastrint(double x) | |
422 { | |
423 return (long int) (x); | |
424 } | |
425 | |
426 static __inline__ long int lfastrintf(float x) | |
427 { | |
428 return (long int) (x); | |
429 } | |
430 #endif | |
431 | |
432 #if defined(__cplusplus) | |
433 } | |
434 #endif | |
435 | |
436 #endif | |
437 | |
438 /*- End of file ------------------------------------------------------------*/ |