annotate peck_fftr.c @ 4:2d6c49fcafcb

neon2 and neon4 support
author Peter Meerwald <p.meerwald@bct-electronic.com>
date Fri, 16 Sep 2011 14:04:19 +0200
parents 3b31bd44a09f
children fee54f1878f7
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
1 /*
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
2 Copyright (c) 2003-2004, Mark Borgerding
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
3
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
4 All rights reserved.
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
5
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
6 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
7
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
8 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
9 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
10 * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
11
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
12 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
13 */
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
14
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
15 #include "peck_fftr.h"
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
16 #include "_peck_fft_guts.h"
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
17
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
18 struct peck_fftr_state{
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
19 peck_fft_cfg substate;
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
20 peck_fft_cpx *tmpbuf;
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
21 peck_fft_cpx *super_twiddles;
4
2d6c49fcafcb neon2 and neon4 support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents: 3
diff changeset
22 #if USE_SIMD == SIMD_SSE2
2d6c49fcafcb neon2 and neon4 support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents: 3
diff changeset
23 void *pad;
0
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
24 #endif
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
25 };
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
26
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
27 peck_fftr_cfg peck_fftr_alloc(int nfft, int inverse_fft, void *mem, size_t *lenmem) {
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
28 int i;
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
29 peck_fftr_cfg st = NULL;
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
30 size_t subsize, memneeded;
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
31
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
32 if (nfft & 1) {
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
33 fprintf(stderr, "Real FFT must be even.\n");
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
34 return NULL;
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
35 }
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
36 nfft >>= 1;
4
2d6c49fcafcb neon2 and neon4 support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents: 3
diff changeset
37 peck_fft_alloc(nfft, inverse_fft, NULL, &subsize);
0
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
38
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
39 memneeded = sizeof(struct peck_fftr_state) + subsize + sizeof(peck_fft_cpx) * (nfft * 3 / 2);
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
40 if (lenmem == NULL) {
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
41 st = (peck_fftr_cfg) PECK_FFT_MALLOC(memneeded);
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
42 } else {
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
43 if (*lenmem >= memneeded)
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
44 st = (peck_fftr_cfg) mem;
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
45 *lenmem = memneeded;
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
46 }
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
47 if (!st)
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
48 return NULL;
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
49
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
50 st->substate = (peck_fft_cfg) (st + 1); /* just beyond peck_fftr_state struct */
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
51 st->tmpbuf = (peck_fft_cpx *) (((char *) st->substate) + subsize);
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
52 st->super_twiddles = st->tmpbuf + nfft;
4
2d6c49fcafcb neon2 and neon4 support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents: 3
diff changeset
53
0
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
54 peck_fft_alloc(nfft, inverse_fft, st->substate, &subsize);
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
55
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
56 for (i = 0; i < nfft/2; ++i) {
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
57 float phase =
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
58 -3.14159265359f * ((float) (i+1) / nfft + 0.5f);
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
59 if (inverse_fft)
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
60 phase *= -1;
3
3b31bd44a09f cleanup
Peter Meerwald <p.meerwald@bct-electronic.com>
parents: 0
diff changeset
61 kf_cexp(st->super_twiddles+i, phase);
0
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
62 }
4
2d6c49fcafcb neon2 and neon4 support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents: 3
diff changeset
63
0
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
64 return st;
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
65 }
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
66
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
67 void peck_fftr(peck_fftr_cfg st, const peck_fft_scalar *timedata, peck_fft_cpx *freqdata) {
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
68 /* Input buffer timedata is stored row-wise */
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
69 int k, ncfft;
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
70 peck_fft_cpx fpnk, fpk, f1k, f2k, tw, tdc;
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
71
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
72 if (st->substate->inverse) {
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
73 fprintf(stderr, "peck_fft usage error: improper alloc\n");
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
74 exit(EXIT_FAILURE);
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
75 }
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
76
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
77 ncfft = st->substate->nfft;
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
78
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
79 /* Perform the parallel FFT of two real signals packed in real,imag */
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
80 peck_fft(st->substate, (const peck_fft_cpx*)timedata, st->tmpbuf);
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
81 /* The real part of the DC element of the frequency spectrum in st->tmpbuf
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
82 * contains the sum of the even-numbered elements of the input time sequence.
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
83 * The imag part is the sum of the odd-numbered elements.
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
84 *
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
85 * The sum of tdc.r and tdc.i is the sum of the input time sequence,
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
86 * yielding DC of the input time sequence.
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
87 * The difference of tdc.r - tdc.i is the sum of the input (dot product) [1,-1,1,-1,...
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
88 * yielding the Nyquist bin of input time sequence.
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
89 */
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
90
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
91 tdc.r = st->tmpbuf[0].r;
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
92 tdc.i = st->tmpbuf[0].i;
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
93 C_FIXDIV(tdc,2);
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
94 CHECK_OVERFLOW_OP(tdc.r ,+, tdc.i);
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
95 CHECK_OVERFLOW_OP(tdc.r ,-, tdc.i);
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
96 freqdata[0].r = tdc.r + tdc.i;
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
97 freqdata[ncfft].r = tdc.r - tdc.i;
4
2d6c49fcafcb neon2 and neon4 support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents: 3
diff changeset
98 #if USE_SIMD == SIMD_SSE2
0
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
99 freqdata[ncfft].i = freqdata[0].i = _mm_set1_ps(0);
4
2d6c49fcafcb neon2 and neon4 support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents: 3
diff changeset
100 #elif USE_SIMD == SIMD_NEON4
2d6c49fcafcb neon2 and neon4 support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents: 3
diff changeset
101 freqdata[ncfft].i = freqdata[0].i = vdupq_n_f32(0.0f);
2d6c49fcafcb neon2 and neon4 support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents: 3
diff changeset
102 #elif USE_SIMD == SIMD_NEON2
2d6c49fcafcb neon2 and neon4 support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents: 3
diff changeset
103 freqdata[ncfft].i = freqdata[0].i = vdup_n_f32(0.0f);
0
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
104 #else
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
105 freqdata[ncfft].i = freqdata[0].i = 0;
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
106 #endif
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
107
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
108 for (k = 1; k <= ncfft/2; ++k) {
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
109 fpk = st->tmpbuf[k];
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
110 fpnk.r = st->tmpbuf[ncfft-k].r;
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
111 fpnk.i = - st->tmpbuf[ncfft-k].i;
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
112 C_FIXDIV(fpk, 2);
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
113 C_FIXDIV(fpnk, 2);
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
114
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
115 C_ADD(f1k, fpk, fpnk);
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
116 C_SUB(f2k, fpk, fpnk);
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
117 C_MUL(tw, f2k, st->super_twiddles[k-1]);
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
118
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
119 freqdata[k].r = HALF_OF(f1k.r + tw.r);
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
120 freqdata[k].i = HALF_OF(f1k.i + tw.i);
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
121 freqdata[ncfft-k].r = HALF_OF(f1k.r - tw.r);
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
122 freqdata[ncfft-k].i = HALF_OF(tw.i - f1k.i);
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
123 }
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
124 }
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
125
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
126 void peck_fftri(peck_fftr_cfg st,const peck_fft_cpx *freqdata,peck_fft_scalar *timedata) {
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
127 /* input buffer timedata is stored row-wise */
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
128 int k, ncfft;
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
129
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
130 if (st->substate->inverse == 0) {
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
131 fprintf (stderr, "peck_fft usage error: improper alloc\n");
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
132 exit(EXIT_FAILURE);
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
133 }
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
134
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
135 ncfft = st->substate->nfft;
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
136
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
137 st->tmpbuf[0].r = freqdata[0].r + freqdata[ncfft].r;
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
138 st->tmpbuf[0].i = freqdata[0].r - freqdata[ncfft].r;
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
139 C_FIXDIV(st->tmpbuf[0], 2);
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
140
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
141 for (k = 1; k <= ncfft / 2; ++k) {
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
142 peck_fft_cpx fk, fnkc, fek, fok, tmp;
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
143 fk = freqdata[k];
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
144 fnkc.r = freqdata[ncfft - k].r;
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
145 fnkc.i = -freqdata[ncfft - k].i;
4
2d6c49fcafcb neon2 and neon4 support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents: 3
diff changeset
146 C_FIXDIV(fk, 2);
2d6c49fcafcb neon2 and neon4 support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents: 3
diff changeset
147 C_FIXDIV(fnkc, 2);
0
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
148
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
149 C_ADD(fek, fk, fnkc);
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
150 C_SUB(tmp, fk, fnkc);
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
151 C_MUL(fok, tmp, st->super_twiddles[k-1]);
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
152 C_ADD(st->tmpbuf[k], fek, fok);
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
153 C_SUB(st->tmpbuf[ncfft - k], fek, fok);
4
2d6c49fcafcb neon2 and neon4 support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents: 3
diff changeset
154 #if USE_SIMD == SIMD_SSE2
2d6c49fcafcb neon2 and neon4 support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents: 3
diff changeset
155 st->tmpbuf[ncfft - k].i *= _mm_set1_ps(-1.0f);
2d6c49fcafcb neon2 and neon4 support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents: 3
diff changeset
156 #elif USE_SIMD == SIMD_NEON4
2d6c49fcafcb neon2 and neon4 support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents: 3
diff changeset
157 st->tmpbuf[ncfft - k].i *= vdupq_n_f32(-1.0f);
2d6c49fcafcb neon2 and neon4 support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents: 3
diff changeset
158 #elif USE_SIMD == SIMD_NEON2
2d6c49fcafcb neon2 and neon4 support
Peter Meerwald <p.meerwald@bct-electronic.com>
parents: 3
diff changeset
159 st->tmpbuf[ncfft - k].i *= vdup_n_f32(-1.0f);
0
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
160 #else
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
161 st->tmpbuf[ncfft - k].i *= -1;
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
162 #endif
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
163 }
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
164 peck_fft(st->substate, st->tmpbuf, (peck_fft_cpx *) timedata);
Peter Meerwald <p.meerwald@bct-electronic.com>
parents:
diff changeset
165 }

Repositories maintained by Peter Meerwald, pmeerw@pmeerw.net.