Mercurial > hg > peckfft

--- a/_peck_fft_guts.h	Fri Sep 16 15:08:29 2011 +0200
+++ b/_peck_fft_guts.h	Fri Sep 16 15:18:28 2011 +0200
@@ -33,102 +33,46 @@
 };

 /*
-  Explanation of macros dealing with complex math:
-
-   C_MUL(m,a,b)         : m = a*b
-   C_FIXDIV( c , div )  : if a fixed point impl., c /= div. noop otherwise
-   C_SUB( res, a,b)     : res = a - b
-   C_SUBFROM( res , a)  : res -= a
-   C_ADDTO( res , a)    : res += a
- * */
-#ifdef FIXED_POINT
-#if (FIXED_POINT==32)
-# define FRACBITS 31
-# define SAMPPROD int64_t
-#define SAMP_MAX 2147483647
-#else
-# define FRACBITS 15
-# define SAMPPROD int32_t
-#define SAMP_MAX 32767
-#endif
-
-#define SAMP_MIN -SAMP_MAX
-
-#if defined(CHECK_OVERFLOW)
-#  define CHECK_OVERFLOW_OP(a,op,b)  \
-	if ( (SAMPPROD)(a) op (SAMPPROD)(b) > SAMP_MAX || (SAMPPROD)(a) op (SAMPPROD)(b) < SAMP_MIN ) { \
-		fprintf(stderr,"WARNING:overflow @ " __FILE__ "(%d): (%d " #op" %d) = %ld\n",__LINE__,(a),(b),(SAMPPROD)(a) op (SAMPPROD)(b) );  }
-#endif
-
-
-#   define smul(a,b) ( (SAMPPROD)(a)*(b) )
-#   define sround( x )  (peck_fft_scalar)( ( (x) + (1<<(FRACBITS-1)) ) >> FRACBITS )
-
-#   define S_MUL(a,b) sround( smul(a,b) )
-
-#   define C_MUL(m,a,b) \
-      do{ (m).r = sround( smul((a).r,(b).r) - smul((a).i,(b).i) ); \
-          (m).i = sround( smul((a).r,(b).i) + smul((a).i,(b).r) ); }while(0)
-
-#   define DIVSCALAR(x,k) \
-	(x) = sround( smul(  x, SAMP_MAX/k ) )
-
-#   define C_FIXDIV(c,div) \
-	do {    DIVSCALAR( (c).r , div);  \
-		DIVSCALAR( (c).i  , div); }while (0)
-
-#   define C_MULBYSCALAR( c, s ) \
-    do{ (c).r =  sround( smul( (c).r , s ) ) ;\
-        (c).i =  sround( smul( (c).i , s ) ) ; }while(0)
+ * Explanation of macros dealing with complex math:
+ *   C_MUL(m,a,b)         : m = a*b
+ *   C_SUB( res, a,b)     : res = a - b
+ *   C_SUBFROM( res , a)  : res -= a
+ *   C_ADDTO( res , a)    : res += a
+ */

-#else  /* not FIXED_POINT*/
-
-#   define S_MUL(a,b) ( (a)*(b) )
-#define C_MUL(m,a,b) \
-    do{ (m).r = (a).r*(b).r - (a).i*(b).i;\
-        (m).i = (a).r*(b).i + (a).i*(b).r; }while(0)
-#   define C_FIXDIV(c,div) /* NOOP */
-#   define C_MULBYSCALAR( c, s ) \
-    do{ (c).r *= (s);\
-        (c).i *= (s); }while(0)
-#endif
-
-#ifndef CHECK_OVERFLOW_OP
-#  define CHECK_OVERFLOW_OP(a,op,b) /* noop */
-#endif
-
-#define  C_ADD( res, a,b)\
+#define S_MUL(a, b) ((a) * (b))
+#define C_MUL(m, a, b) \
     do { \
-	    CHECK_OVERFLOW_OP((a).r,+,(b).r)\
-	    CHECK_OVERFLOW_OP((a).i,+,(b).i)\
-	    (res).r=(a).r+(b).r;  (res).i=(a).i+(b).i; \
-    }while(0)
-#define  C_SUB( res, a,b)\
+        (m).r = (a).r*(b).r - (a).i*(b).i; \
+        (m).i = (a).r*(b).i + (a).i*(b).r; \
+    } while(0)
+#define C_MULBYSCALAR(c, s ) \
+    do { \
+        (c).r *= (s); \
+        (c).i *= (s); \
+    } while(0)
+#define  C_ADD(res, a, b) \
     do { \
-	    CHECK_OVERFLOW_OP((a).r,-,(b).r)\
-	    CHECK_OVERFLOW_OP((a).i,-,(b).i)\
-	    (res).r=(a).r-(b).r;  (res).i=(a).i-(b).i; \
-    }while(0)
-#define C_ADDTO( res , a)\
+	    (res).r = (a).r + (b).r; \
+	    (res).i = (a).i + (b).i; \
+    } while(0)
+#define  C_SUB(res, a, b) \
     do { \
-	    CHECK_OVERFLOW_OP((res).r,+,(a).r)\
-	    CHECK_OVERFLOW_OP((res).i,+,(a).i)\
-	    (res).r += (a).r;  (res).i += (a).i;\
-    }while(0)
+	    (res).r = (a).r - (b).r; \
+	    (res).i = (a).i - (b).i; \
+    } while(0)
+#define C_ADDTO(res, a) \
+    do { \
+	    (res).r += (a).r; \
+	    (res).i += (a).i; \
+    } while(0)
+#define C_SUBFROM(res, a) \
+    do { \
+	    (res).r -= (a).r; \
+	    (res).i -= (a).i; \
+    } while(0)

-#define C_SUBFROM( res , a)\
-    do {\
-	    CHECK_OVERFLOW_OP((res).r,-,(a).r)\
-	    CHECK_OVERFLOW_OP((res).i,-,(a).i)\
-	    (res).r -= (a).r;  (res).i -= (a).i; \
-    }while(0)
-
-
-#ifdef FIXED_POINT
-    #define PECK_FFT_COS(phase) floorf(0.5f+SAMP_MAX * cosf(phase))
-    #define PECK_FFT_SIN(phase) floorf(0.5f+SAMP_MAX * sinf(phase))
-    #define HALF_OF(x) ((x)>>1)
-#elif USE_SIMD == SIMD_SSE2
+#if USE_SIMD == SIMD_SSE2
     #define PECK_FFT_COS(phase) _mm_set1_ps(cosf(phase))
     #define PECK_FFT_SIN(phase) _mm_set1_ps(sinf(phase))
     #define HALF_OF(x) ((x)*_mm_set1_ps(0.5f))
@@ -147,16 +91,10 @@
 #endif

 #define  kf_cexp(x,phase) \
-	do{ \
-		(x)->r = PECK_FFT_COS(phase);\
-		(x)->i = PECK_FFT_SIN(phase);\
-	}while(0)
-
-
-/* a debugging function */
-#define pcpx(c)\
-    fprintf(stderr,"%g + %gi\n",(double)((c)->r),(double)((c)->i) )
-
+	do { \
+		(x)->r = PECK_FFT_COS(phase); \
+		(x)->i = PECK_FFT_SIN(phase); \
+	} while(0)

 #ifdef PECK_FFT_USE_ALLOCA
 // define this to allow use of alloca instead of malloc for temporary buffers
--- a/peck_fft.c	Fri Sep 16 15:08:29 2011 +0200
+++ b/peck_fft.c	Fri Sep 16 15:18:28 2011 +0200
@@ -31,9 +31,6 @@
     peck_fft_cpx t;
     Fout2 = Fout + m;
     do {
-        C_FIXDIV(*Fout, 2);
-        C_FIXDIV(*Fout2, 2);
-
         C_MUL(t, *Fout2, *tw1);
         tw1 += fstride;
         C_SUB(*Fout2, *Fout, t);
@@ -60,11 +57,6 @@
     tw3 = tw2 = tw1 = st->twiddles;

     do {
-        C_FIXDIV(*Fout, 4);
-        C_FIXDIV(Fout[m], 4);
-        C_FIXDIV(Fout[m2], 4);
-        C_FIXDIV(Fout[m3], 4);
-
         C_MUL(scratch[0], Fout[m], *tw1);
         C_MUL(scratch[1], Fout[m2], *tw2);
         C_MUL(scratch[2], Fout[m3], *tw3);
@@ -112,8 +104,6 @@
      tw1 = tw2 = st->twiddles;

      do {
-         C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
-
          C_MUL(scratch[1],Fout[m] , *tw1);
          C_MUL(scratch[2],Fout[m2] , *tw2);

@@ -164,18 +154,17 @@

     tw=st->twiddles;
     for (u = 0; u < m; ++u) {
-        C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
         scratch[0] = *Fout0;

-        C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
-        C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
-        C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
-        C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
+        C_MUL(scratch[1], *Fout1, tw[u*fstride]);
+        C_MUL(scratch[2], *Fout2, tw[2*u*fstride]);
+        C_MUL(scratch[3], *Fout3, tw[3*u*fstride]);
+        C_MUL(scratch[4], *Fout4, tw[4*u*fstride]);

-        C_ADD( scratch[7],scratch[1],scratch[4]);
-        C_SUB( scratch[10],scratch[1],scratch[4]);
-        C_ADD( scratch[8],scratch[2],scratch[3]);
-        C_SUB( scratch[9],scratch[2],scratch[3]);
+        C_ADD(scratch[7], scratch[1], scratch[4]);
+        C_SUB(scratch[10], scratch[1], scratch[4]);
+        C_ADD(scratch[8], scratch[2], scratch[3]);
+        C_SUB(scratch[9], scratch[2], scratch[3]);

         Fout0->r += scratch[7].r + scratch[8].r;
         Fout0->i += scratch[7].i + scratch[8].i;
@@ -222,7 +211,6 @@
         k=u;
         for (q1 = 0; q1 < p; ++q1) {
             scratch[q1] = Fout[k];
-            C_FIXDIV(scratch[q1], p);
             k += m;
         }
--- a/peck_fftr.c	Fri Sep 16 15:08:29 2011 +0200
+++ b/peck_fftr.c	Fri Sep 16 15:18:28 2011 +0200
@@ -90,9 +90,6 @@

     tdc.r = st->tmpbuf[0].r;
     tdc.i = st->tmpbuf[0].i;
-    C_FIXDIV(tdc,2);
-    CHECK_OVERFLOW_OP(tdc.r ,+, tdc.i);
-    CHECK_OVERFLOW_OP(tdc.r ,-, tdc.i);
     freqdata[0].r = tdc.r + tdc.i;
     freqdata[ncfft].r = tdc.r - tdc.i;
 #if USE_SIMD == SIMD_SSE2
@@ -109,8 +106,6 @@
         fpk    = st->tmpbuf[k];
         fpnk.r = st->tmpbuf[ncfft-k].r;
         fpnk.i = - st->tmpbuf[ncfft-k].i;
-        C_FIXDIV(fpk, 2);
-        C_FIXDIV(fpnk, 2);

         C_ADD(f1k, fpk, fpnk);
         C_SUB(f2k, fpk, fpnk);
@@ -136,15 +131,12 @@

     st->tmpbuf[0].r = freqdata[0].r + freqdata[ncfft].r;
     st->tmpbuf[0].i = freqdata[0].r - freqdata[ncfft].r;
-    C_FIXDIV(st->tmpbuf[0], 2);

     for (k = 1; k <= ncfft / 2; ++k) {
         peck_fft_cpx fk, fnkc, fek, fok, tmp;
         fk = freqdata[k];
         fnkc.r = freqdata[ncfft - k].r;
         fnkc.i = -freqdata[ncfft - k].i;
-        C_FIXDIV(fk, 2);
-        C_FIXDIV(fnkc, 2);

         C_ADD(fek, fk, fnkc);
         C_SUB(tmp, fk, fnkc);