comparison spandsp-0.0.6pre17/src/spandsp/fast_convert.h @ 4:26cd8f1ef0b1

import spandsp-0.0.6pre17
author Peter Meerwald <pmeerw@cosy.sbg.ac.at>
date Fri, 25 Jun 2010 15:50:58 +0200
parents
children
comparison
equal deleted inserted replaced
3:c6c5a16ce2f2 4:26cd8f1ef0b1
1 /*
2 * SpanDSP - a series of DSP components for telephony
3 *
4 * fast_convert.h - Quick ways to convert floating point numbers to integers
5 *
6 * Written by Steve Underwood <steveu@coppice.org>
7 *
8 * Copyright (C) 2009 Steve Underwood
9 *
10 * All rights reserved.
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU Lesser General Public License version 2.1,
14 * as published by the Free Software Foundation.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU Lesser General Public License for more details.
20 *
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 *
25 * $Id: fast_convert.h,v 1.9 2009/10/03 04:37:25 steveu Exp $
26 */
27
28 #if !defined(_SPANDSP_FAST_CONVERT_H_)
29 #define _SPANDSP_FAST_CONVERT_H_
30
31 #if defined(__cplusplus)
32 extern "C"
33 {
34 #endif
35
36 /* The following code, to handle issues with lrint() and lrintf() on various
37 * platforms, is adapted from similar code in libsndfile, which is:
38 *
39 * Copyright (C) 2001-2004 Erik de Castro Lopo <erikd@mega-nerd.com>
40 *
41 * This program is free software; you can redistribute it and/or modify
42 * it under the terms of the GNU Lesser General Public License as published by
43 * the Free Software Foundation; either version 2.1 of the License, or
44 * (at your option) any later version.
45 *
46 * This program is distributed in the hope that it will be useful,
47 * but WITHOUT ANY WARRANTY; without even the implied warranty of
48 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
49 * GNU Lesser General Public License for more details.
50 */
51
52 /*
53 * On Intel Pentium processors (especially PIII and probably P4), converting
54 * from float to int is very slow. To meet the C specs, the code produced by
55 * most C compilers targeting Pentium needs to change the FPU rounding mode
56 * before the float to int conversion is performed.
57 *
58 * Changing the FPU rounding mode causes the FPU pipeline to be flushed. It
59 * is this flushing of the pipeline which is so slow.
60 *
61 * Fortunately the ISO C99 specification defines the functions lrint, lrintf,
62 * llrint and llrintf which fix this problem as a side effect.
63 *
64 * On Unix-like systems, the configure process should have detected the
65 * presence of these functions. If they weren't found we have to replace them
66 * here with a standard C cast.
67 */
68
69 /*
70 * The C99 prototypes for these functions are as follows:
71 *
72 * int rintf(float x);
73 * int rint(double x);
74 * long int lrintf(float x);
75 * long int lrint(double x);
76 * long long int llrintf(float x);
77 * long long int llrint(double x);
78 *
79 * The presence of the required functions are detected during the configure
80 * process and the values HAVE_LRINT and HAVE_LRINTF are set accordingly in
81 * the config file.
82 */
83
84 #if defined(__CYGWIN__)
85 #if !defined(__cplusplus)
86 /*
87 * CYGWIN has lrint and lrintf functions, but they are slow and buggy:
88 * http://sourceware.org/ml/cygwin/2005-06/msg00153.html
89 * http://sourceware.org/ml/cygwin/2005-09/msg00047.html
90 * The latest version of cygwin seems to have made no effort to fix this.
91 * These replacement functions (pulled from the Public Domain MinGW
92 * math.h header) replace the native versions.
93 */
94 static __inline__ long int lrint(double x)
95 {
96 long int retval;
97
98 __asm__ __volatile__
99 (
100 "fistpl %0"
101 : "=m" (retval)
102 : "t" (x)
103 : "st"
104 );
105
106 return retval;
107 }
108
109 static __inline__ long int lrintf(float x)
110 {
111 long int retval;
112
113 __asm__ __volatile__
114 (
115 "fistpl %0"
116 : "=m" (retval)
117 : "t" (x)
118 : "st"
119 );
120 return retval;
121 }
122 #endif
123
124 /* The fastest way to convert is the equivalent of lrint() */
125 static __inline__ long int lfastrint(double x)
126 {
127 long int retval;
128
129 __asm__ __volatile__
130 (
131 "fistpl %0"
132 : "=m" (retval)
133 : "t" (x)
134 : "st"
135 );
136
137 return retval;
138 }
139
140 static __inline__ long int lfastrintf(float x)
141 {
142 long int retval;
143
144 __asm__ __volatile__
145 (
146 "fistpl %0"
147 : "=m" (retval)
148 : "t" (x)
149 : "st"
150 );
151 return retval;
152 }
153 #elif defined(__GNUC__) || (__SUNPRO_C >= 0x0590)
154
155 #if defined(__i386__)
156 /* These routines are guaranteed fast on an i386 machine. Using the built in
157 lrint() and lrintf() should be similar, but they may not always be enabled.
158 Sometimes, especially with "-O0", you might get slow calls to routines. */
159 static __inline__ long int lfastrint(double x)
160 {
161 long int retval;
162
163 __asm__ __volatile__
164 (
165 "fistpl %0"
166 : "=m" (retval)
167 : "t" (x)
168 : "st"
169 );
170
171 return retval;
172 }
173
174 static __inline__ long int lfastrintf(float x)
175 {
176 long int retval;
177
178 __asm__ __volatile__
179 (
180 "fistpl %0"
181 : "=m" (retval)
182 : "t" (x)
183 : "st"
184 );
185 return retval;
186 }
187 #elif defined(__x86_64__)
188 /* On an x86_64 machine, the fastest thing seems to be a pure assignment from a
189 double or float to an int. It looks like the design on the x86_64 took account
190 of the default behaviour specified for C. */
191 static __inline__ long int lfastrint(double x)
192 {
193 return (long int) (x);
194 }
195
196 static __inline__ long int lfastrintf(float x)
197 {
198 return (long int) (x);
199 }
200 #elif defined(__ppc__) || defined(__powerpc__)
201 static __inline__ long int lfastrint(register double x)
202 {
203 int res[2];
204
205 __asm__ __volatile__
206 (
207 "fctiw %1, %1\n\t"
208 "stfd %1, %0"
209 : "=m" (res) /* Output */
210 : "f" (x) /* Input */
211 : "memory"
212 );
213
214 return res[1];
215 }
216
217 static __inline__ long int lfastrintf(register float x)
218 {
219 int res[2];
220
221 __asm__ __volatile__
222 (
223 "fctiw %1, %1\n\t"
224 "stfd %1, %0"
225 : "=m" (res) /* Output */
226 : "f" (x) /* Input */
227 : "memory"
228 );
229
230 return res[1];
231 }
232 #else
233 /* Fallback routines, for unrecognised platforms */
234 static __inline__ long int lfastrint(double x)
235 {
236 return (long int) x;
237 }
238
239 static __inline__ long int lfastrintf(float x)
240 {
241 return (long int) x;
242 }
243 #endif
244
245 #elif defined(_M_IX86)
246 /* Visual Studio i386 */
247 /*
248 * Win32 doesn't seem to have the lrint() and lrintf() functions.
249 * Therefore implement inline versions of these functions here.
250 */
251
252 __inline long int lrint(double x)
253 {
254 long int i;
255
256 _asm
257 {
258 fld x
259 fistp i
260 };
261 return i;
262 }
263
264 __inline long int lrintf(float x)
265 {
266 long int i;
267
268 _asm
269 {
270 fld x
271 fistp i
272 };
273 return i;
274 }
275
276 __inline float rintf(float flt)
277 {
278 _asm
279 { fld flt
280 frndint
281 }
282 }
283
284 __inline double rint(double dbl)
285 {
286 _asm
287 {
288 fld dbl
289 frndint
290 }
291 }
292
293 __inline long int lfastrint(double x)
294 {
295 long int i;
296
297 _asm
298 {
299 fld x
300 fistp i
301 };
302 return i;
303 }
304
305 __inline long int lfastrintf(float x)
306 {
307 long int i;
308
309 _asm
310 {
311 fld x
312 fistp i
313 };
314 return i;
315 }
316 #elif defined(_M_X64)
317 /* Visual Studio x86_64 */
318 /* x86_64 machines will do best with a simple assignment. */
319 #include <intrin.h>
320
321 __inline long int lrint(double x)
322 {
323 return (long int)_mm_cvtsd_si64x( _mm_loadu_pd ((const double*)&x) );
324 }
325
326 __inline long int lrintf(float x)
327 {
328 return _mm_cvt_ss2si( _mm_load_ss((const float*)&x) );
329 }
330
331 __inline long int lfastrint(double x)
332 {
333 return (long int) (x);
334 }
335
336 __inline long int lfastrintf(float x)
337 {
338 return (long int) (x);
339 }
340 #elif defined(__MWERKS__) && defined(macintosh)
341 /* This MacOS 9 solution was provided by Stephane Letz */
342
343 long int __inline__ lfastrint(register double x)
344 {
345 long int res[2];
346
347 asm
348 {
349 fctiw x, x
350 stfd x, res
351 }
352 return res[1];
353 }
354
355 long int __inline__ lfastrintf(register float x)
356 {
357 long int res[2];
358
359 asm
360 {
361 fctiw x, x
362 stfd x, res
363 }
364 return res[1];
365 }
366 #elif defined(__MACH__) && defined(__APPLE__) && (defined(__ppc__) || defined(__powerpc__))
367 /* For Apple Mac OS/X - do recent versions still need this? */
368
369 static __inline__ long int lfastrint(register double x)
370 {
371 int res[2];
372
373 __asm__ __volatile__
374 (
375 "fctiw %1, %1\n\t"
376 "stfd %1, %0"
377 : "=m" (res) /* Output */
378 : "f" (x) /* Input */
379 : "memory"
380 );
381
382 return res[1];
383 }
384
385 static __inline__ long int lfastrintf(register float x)
386 {
387 int res[2];
388
389 __asm__ __volatile__
390 (
391 "fctiw %1, %1\n\t"
392 "stfd %1, %0"
393 : "=m" (res) /* Output */
394 : "f" (x) /* Input */
395 : "memory"
396 );
397
398 return res[1];
399 }
400 #else
401 /* There is nothing else to do, but use a simple casting operation, instead of a real
402 rint() type function. Since we are only trying to use rint() to speed up conversions,
403 the accuracy issues related to changing the rounding scheme are of little concern
404 to us. */
405
406 #if !defined(__sgi) && !defined(__sunos) && !defined(__solaris) && !defined(__sun)
407 #warning "No usable lrint() and lrintf() functions available."
408 #warning "Replacing these functions with a simple C cast."
409 #endif
410
411 static __inline__ long int lrint(double x)
412 {
413 return (long int) (x);
414 }
415
416 static __inline__ long int lrintf(float x)
417 {
418 return (long int) (x);
419 }
420
421 static __inline__ long int lfastrint(double x)
422 {
423 return (long int) (x);
424 }
425
426 static __inline__ long int lfastrintf(float x)
427 {
428 return (long int) (x);
429 }
430 #endif
431
432 #if defined(__cplusplus)
433 }
434 #endif
435
436 #endif
437
438 /*- End of file ------------------------------------------------------------*/

Repositories maintained by Peter Meerwald, pmeerw@pmeerw.net.