diff spandsp-0.0.6pre17/src/spandsp/fast_convert.h @ 4:26cd8f1ef0b1

import spandsp-0.0.6pre17
author Peter Meerwald <pmeerw@cosy.sbg.ac.at>
date Fri, 25 Jun 2010 15:50:58 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spandsp-0.0.6pre17/src/spandsp/fast_convert.h	Fri Jun 25 15:50:58 2010 +0200
@@ -0,0 +1,438 @@
+/*
+ * SpanDSP - a series of DSP components for telephony
+ *
+ * fast_convert.h - Quick ways to convert floating point numbers to integers
+ *
+ * Written by Steve Underwood <steveu@coppice.org>
+ *
+ * Copyright (C) 2009 Steve Underwood
+ *
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 2.1,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * $Id: fast_convert.h,v 1.9 2009/10/03 04:37:25 steveu Exp $
+ */
+
+#if !defined(_SPANDSP_FAST_CONVERT_H_)
+#define _SPANDSP_FAST_CONVERT_H_
+
+#if defined(__cplusplus)
+extern "C"
+{
+#endif
+
+/* The following code, to handle issues with lrint() and lrintf() on various
+ * platforms, is adapted from similar code in libsndfile, which is:
+ *
+ * Copyright (C) 2001-2004 Erik de Castro Lopo <erikd@mega-nerd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ */
+
+/*
+ *    On Intel Pentium processors (especially PIII and probably P4), converting
+ *    from float to int is very slow. To meet the C specs, the code produced by
+ *    most C compilers targeting Pentium needs to change the FPU rounding mode
+ *    before the float to int conversion is performed.
+ *
+ *    Changing the FPU rounding mode causes the FPU pipeline to be flushed. It
+ *    is this flushing of the pipeline which is so slow.
+ *
+ *    Fortunately the ISO C99 specification defines the functions lrint, lrintf,
+ *    llrint and llrintf which fix this problem as a side effect.
+ *
+ *    On Unix-like systems, the configure process should have detected the
+ *    presence of these functions. If they weren't found we have to replace them
+ *    here with a standard C cast.
+ */
+
+/*
+ *    The C99 prototypes for these functions are as follows:
+ *
+ *        int rintf(float x);
+ *        int rint(double x);
+ *        long int lrintf(float x);
+ *        long int lrint(double x);
+ *        long long int llrintf(float x);
+ *        long long int llrint(double x);
+ *
+ *    The presence of the required functions are detected during the configure
+ *    process and the values HAVE_LRINT and HAVE_LRINTF are set accordingly in
+ *    the config file.
+ */
+
+#if defined(__CYGWIN__)
+#if !defined(__cplusplus)
+    /*
+     *    CYGWIN has lrint and lrintf functions, but they are slow and buggy:
+     *        http://sourceware.org/ml/cygwin/2005-06/msg00153.html
+     *        http://sourceware.org/ml/cygwin/2005-09/msg00047.html
+     *    The latest version of cygwin seems to have made no effort to fix this.
+     *    These replacement functions (pulled from the Public Domain MinGW
+     *    math.h header) replace the native versions.
+     */
+    static __inline__ long int lrint(double x)
+    {
+        long int retval;
+
+        __asm__ __volatile__
+        (
+            "fistpl %0"
+            : "=m" (retval)
+            : "t" (x)
+            : "st"
+        );
+
+        return retval;
+    }
+
+    static __inline__ long int lrintf(float x)
+    {
+        long int retval;
+
+        __asm__ __volatile__
+        (
+            "fistpl %0"
+            : "=m" (retval)
+            : "t" (x)
+            : "st"
+        );
+        return retval;
+    }
+#endif
+
+    /* The fastest way to convert is the equivalent of lrint() */
+    static __inline__ long int lfastrint(double x)
+    {
+        long int retval;
+
+        __asm__ __volatile__
+        (
+            "fistpl %0"
+            : "=m" (retval)
+            : "t" (x)
+            : "st"
+        );
+
+        return retval;
+    }
+
+    static __inline__ long int lfastrintf(float x)
+    {
+        long int retval;
+
+        __asm__ __volatile__
+        (
+            "fistpl %0"
+            : "=m" (retval)
+            : "t" (x)
+            : "st"
+        );
+        return retval;
+    }
+#elif defined(__GNUC__)  ||  (__SUNPRO_C >= 0x0590)
+
+#if defined(__i386__)
+    /* These routines are guaranteed fast on an i386 machine. Using the built in
+       lrint() and lrintf() should be similar, but they may not always be enabled.
+       Sometimes, especially with "-O0", you might get slow calls to routines. */
+    static __inline__ long int lfastrint(double x)
+    {
+        long int retval;
+
+        __asm__ __volatile__
+        (
+            "fistpl %0"
+            : "=m" (retval)
+            : "t" (x)
+            : "st"
+        );
+
+        return retval;
+    }
+
+    static __inline__ long int lfastrintf(float x)
+    {
+        long int retval;
+
+        __asm__ __volatile__
+        (
+            "fistpl %0"
+            : "=m" (retval)
+            : "t" (x)
+            : "st"
+        );
+        return retval;
+    }
+#elif defined(__x86_64__)
+    /* On an x86_64 machine, the fastest thing seems to be a pure assignment from a
+       double or float to an int. It looks like the design on the x86_64 took account
+       of the default behaviour specified for C. */
+    static __inline__ long int lfastrint(double x)
+    {
+        return (long int) (x);
+    }
+
+    static __inline__ long int lfastrintf(float x)
+    {
+        return (long int) (x);
+    }
+#elif defined(__ppc__)  ||   defined(__powerpc__)
+    static __inline__ long int lfastrint(register double x)
+    {
+        int res[2];
+
+        __asm__ __volatile__
+        (
+            "fctiw %1, %1\n\t"
+            "stfd %1, %0"
+            : "=m" (res)    /* Output */
+            : "f" (x)       /* Input */
+            : "memory"
+        );
+
+        return res[1];
+    }
+
+    static __inline__ long int lfastrintf(register float x)
+    {
+        int res[2];
+
+        __asm__ __volatile__
+        (
+            "fctiw %1, %1\n\t"
+            "stfd %1, %0"
+            : "=m" (res)    /* Output */
+            : "f" (x)       /* Input */
+            : "memory"
+        );
+
+        return res[1];
+    }
+#else
+    /* Fallback routines, for unrecognised platforms */
+    static __inline__ long int lfastrint(double x)
+    {
+        return (long int) x;
+    }
+
+    static __inline__ long int lfastrintf(float x)
+    {
+        return (long int) x;
+    }
+#endif
+
+#elif defined(_M_IX86)
+    /* Visual Studio i386 */
+    /*
+     *    Win32 doesn't seem to have the lrint() and lrintf() functions.
+     *    Therefore implement inline versions of these functions here.
+     */
+
+    __inline long int lrint(double x)
+    {
+        long int i;
+
+        _asm
+        {
+            fld x
+            fistp i
+        };
+        return i;
+    }
+
+    __inline long int lrintf(float x)
+    {
+        long int i;
+
+        _asm
+        {
+            fld x
+            fistp i
+        };
+        return i;
+    }
+
+    __inline float rintf(float flt)
+    {
+    	_asm
+    	{	fld flt
+    		frndint
+    	}
+    }
+
+    __inline double rint(double dbl)
+    {
+        _asm 
+    	{
+            fld dbl
+            frndint
+        }
+    }
+
+    __inline long int lfastrint(double x)
+    {
+        long int i;
+
+        _asm
+        {
+            fld x
+            fistp i
+        };
+        return i;
+    }
+
+    __inline long int lfastrintf(float x)
+    {
+        long int i;
+
+        _asm
+        {
+            fld x
+            fistp i
+        };
+        return i;
+    }
+#elif defined(_M_X64)
+    /* Visual Studio x86_64 */
+    /* x86_64 machines will do best with a simple assignment. */
+#include <intrin.h>
+
+    __inline long int lrint(double x)
+    {
+		return (long int)_mm_cvtsd_si64x( _mm_loadu_pd ((const double*)&x) );
+    }
+
+    __inline long int lrintf(float x)
+    {
+		return _mm_cvt_ss2si( _mm_load_ss((const float*)&x) );
+    }
+
+    __inline long int lfastrint(double x)
+    {
+        return (long int) (x);
+    }
+
+    __inline long int lfastrintf(float x)
+    {
+        return (long int) (x);
+    }
+#elif defined(__MWERKS__)  &&  defined(macintosh)
+    /* This MacOS 9 solution was provided by Stephane Letz */
+
+    long int __inline__ lfastrint(register double x)
+    {
+        long int res[2];
+
+        asm
+        {
+            fctiw x, x
+            stfd x, res
+        }
+        return res[1];
+    }
+
+    long int __inline__ lfastrintf(register float x)
+    {
+        long int res[2];
+
+        asm
+        {
+            fctiw x, x
+            stfd x, res
+        }
+        return res[1];
+    }
+#elif defined(__MACH__)  &&  defined(__APPLE__)  &&  (defined(__ppc__)  ||  defined(__powerpc__))
+    /* For Apple Mac OS/X - do recent versions still need this? */
+
+    static __inline__ long int lfastrint(register double x)
+    {
+        int res[2];
+
+        __asm__ __volatile__
+        (
+            "fctiw %1, %1\n\t"
+            "stfd %1, %0"
+            : "=m" (res)    /* Output */
+            : "f" (x)       /* Input */
+            : "memory"
+        );
+
+        return res[1];
+    }
+
+    static __inline__ long int lfastrintf(register float x)
+    {
+        int res[2];
+
+        __asm__ __volatile__
+        (
+            "fctiw %1, %1\n\t"
+            "stfd %1, %0"
+            : "=m" (res)    /* Output */
+            : "f" (x)       /* Input */
+            : "memory"
+        );
+
+        return res[1];
+    }
+#else
+    /* There is nothing else to do, but use a simple casting operation, instead of a real
+       rint() type function. Since we are only trying to use rint() to speed up conversions,
+       the accuracy issues related to changing the rounding scheme are of little concern
+       to us. */
+
+    #if !defined(__sgi)  &&  !defined(__sunos)  &&  !defined(__solaris)  &&  !defined(__sun)
+        #warning "No usable lrint() and lrintf() functions available."
+        #warning "Replacing these functions with a simple C cast."
+    #endif
+
+    static __inline__ long int lrint(double x)
+    {
+        return (long int) (x);
+    }
+
+    static __inline__ long int lrintf(float x)
+    {
+        return (long int) (x);
+    }
+
+    static __inline__ long int lfastrint(double x)
+    {
+        return (long int) (x);
+    }
+
+    static __inline__ long int lfastrintf(float x)
+    {
+        return (long int) (x);
+    }
+#endif
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
+
+/*- End of file ------------------------------------------------------------*/

Repositories maintained by Peter Meerwald, pmeerw@pmeerw.net.