2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
14 #include "dl/api/omxtypes.h"
15 #include "dl/sp/api/mipsSP.h"
17 OMXResult mips_FFTInv_CCSToR_F32_complex(const OMX_F32* pSrc,
19 const MIPSFFTSpec_R_FC32* pFFTSpec) {
20 OMX_U32 num_transforms, step;
21 /* Quarter, half and three-quarters of transform size. */
22 OMX_U32 n1_4, n1_2, n3_4;
23 const OMX_F32* w_re_ptr;
24 const OMX_F32* w_im_ptr;
25 OMX_U32 fft_size = 1 << pFFTSpec->order;
26 OMX_FC32* p_buf = (OMX_FC32*)pFFTSpec->pBuf;
27 const OMX_FC32* p_src = (const OMX_FC32*)pSrc;
29 OMX_U16* p_bitrev = pFFTSpec->pBitRevInv;
30 OMX_F32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, factor;
33 w_re_ptr = pFFTSpec->pTwiddle + 1;
34 w_im_ptr = pFFTSpec->pTwiddle + (OMX_U32)(1 << pFFTSpec->order - 2) - 1;
37 * Preliminary loop performing input adaptation due to computing real FFT
38 * through complex FFT of half the size.
40 for (uint32_t n = 1; n < fft_size / 8; ++n) {
43 tmp3 = p_src[fft_size / 2 - n].Re;
44 tmp4 = p_src[fft_size / 2 - n].Im;
54 p_buf[p_bitrev[n]].Re = 0.5f * (tmp5 - w_re * tmp7 - w_im * tmp6);
55 p_buf[p_bitrev[n]].Im = 0.5f * (tmp8 + w_re * tmp6 - w_im * tmp7);
56 p_buf[p_bitrev[fft_size / 2 - n]].Re =
57 0.5f * (tmp5 + w_re * tmp7 + w_im * tmp6);
58 p_buf[p_bitrev[fft_size / 2 - n]].Im =
59 0.5f * (-tmp8 + w_re * tmp6 - w_im * tmp7);
61 tmp1 = p_src[n + fft_size / 4].Re;
62 tmp2 = p_src[n + fft_size / 4].Im;
63 tmp3 = p_src[fft_size / 4 - n].Re;
64 tmp4 = p_src[fft_size / 4 - n].Im;
71 p_buf[p_bitrev[n + fft_size / 4]].Re =
72 0.5f * (tmp5 + w_im * tmp7 - w_re * tmp6);
73 p_buf[p_bitrev[n + fft_size / 4]].Im =
74 0.5f * (tmp8 - w_im * tmp6 - w_re * tmp7);
75 p_buf[p_bitrev[fft_size / 4 - n]].Re =
76 0.5f * (tmp5 - w_im * tmp7 + w_re * tmp6);
77 p_buf[p_bitrev[fft_size / 4 - n]].Im =
78 0.5f * (-tmp8 - w_im * tmp6 - w_re * tmp7);
83 tmp1 = p_src[fft_size / 8].Re;
84 tmp2 = p_src[fft_size / 8].Im;
85 tmp3 = p_src[3 * fft_size / 8].Re;
86 tmp4 = p_src[3 * fft_size / 8].Im;
96 p_buf[p_bitrev[fft_size / 8]].Re = 0.5f * (tmp5 - w_re * tmp7 - w_im * tmp6);
97 p_buf[p_bitrev[fft_size / 8]].Im = 0.5f * (tmp8 + w_re * tmp6 - w_im * tmp7);
98 p_buf[p_bitrev[3 * fft_size / 8]].Re =
99 0.5f * (tmp5 + w_re * tmp7 + w_im * tmp6);
100 p_buf[p_bitrev[3 * fft_size / 8]].Im =
101 0.5f * (-tmp8 + w_re * tmp6 - w_im * tmp7);
103 p_buf[p_bitrev[0]].Re = 0.5f * (p_src[0].Re + p_src[fft_size / 2].Re);
104 p_buf[p_bitrev[0]].Im = 0.5f * (p_src[0].Re - p_src[fft_size / 2].Re);
105 p_buf[p_bitrev[fft_size / 4]].Re = p_src[fft_size / 4].Re;
106 p_buf[p_bitrev[fft_size / 4]].Im = -p_src[fft_size / 4].Im;
109 * Loop performing sub-transforms of size 4,
110 * which contain 2 butterfly operations.
112 num_transforms = (SUBTRANSFORM_CONST >> (17 - pFFTSpec->order)) | 1;
113 for (uint32_t n = 0; n < num_transforms; ++n) {
114 OMX_U32 offset = pFFTSpec->pOffset[n] << 2;
115 OMX_FC32* p_tmp = p_buf + offset;
117 tmp1 = p_tmp[0].Re + p_tmp[1].Re;
118 tmp2 = p_tmp[0].Im + p_tmp[1].Im;
119 tmp3 = p_tmp[0].Re - p_tmp[1].Re;
120 tmp4 = p_tmp[0].Im - p_tmp[1].Im;
121 tmp5 = p_tmp[2].Re + p_tmp[3].Re;
122 tmp6 = p_tmp[2].Im + p_tmp[3].Im;
123 tmp8 = p_tmp[2].Im - p_tmp[3].Im;
124 tmp7 = p_tmp[2].Re - p_tmp[3].Re;
126 p_tmp[0].Re = tmp1 + tmp5;
127 p_tmp[2].Re = tmp1 - tmp5;
128 p_tmp[0].Im = tmp2 + tmp6;
129 p_tmp[2].Im = tmp2 - tmp6;
130 p_tmp[1].Re = tmp3 - tmp8;
131 p_tmp[3].Re = tmp3 + tmp8;
132 p_tmp[1].Im = tmp4 + tmp7;
133 p_tmp[3].Im = tmp4 - tmp7;
136 num_transforms = (num_transforms >> 1) | 1;
138 * Loop performing sub-transforms of size 8,
139 * which contain four butterfly operations.
141 for (uint32_t n = 0; n < num_transforms; ++n) {
142 OMX_U32 offset = pFFTSpec->pOffset[n] << 3;
143 OMX_FC32* p_tmp = p_buf + offset;
145 tmp1 = p_tmp[4].Re + p_tmp[5].Re;
146 tmp3 = p_tmp[6].Re + p_tmp[7].Re;
147 tmp2 = p_tmp[4].Im + p_tmp[5].Im;
148 tmp4 = p_tmp[6].Im + p_tmp[7].Im;
155 tmp1 = p_tmp[4].Re - p_tmp[5].Re;
156 tmp2 = p_tmp[4].Im - p_tmp[5].Im;
157 tmp3 = p_tmp[6].Re - p_tmp[7].Re;
158 tmp4 = p_tmp[6].Im - p_tmp[7].Im;
160 p_tmp[4].Re = p_tmp[0].Re - tmp5;
161 p_tmp[0].Re = p_tmp[0].Re + tmp5;
162 p_tmp[4].Im = p_tmp[0].Im - tmp6;
163 p_tmp[0].Im = p_tmp[0].Im + tmp6;
164 p_tmp[6].Re = p_tmp[2].Re + tmp8;
165 p_tmp[2].Re = p_tmp[2].Re - tmp8;
166 p_tmp[6].Im = p_tmp[2].Im - tmp7;
167 p_tmp[2].Im = p_tmp[2].Im + tmp7;
169 tmp5 = SQRT1_2 * (tmp1 - tmp2);
170 tmp7 = SQRT1_2 * (tmp3 + tmp4);
171 tmp6 = SQRT1_2 * (tmp1 + tmp2);
172 tmp8 = SQRT1_2 * (tmp4 - tmp3);
179 p_tmp[5].Re = p_tmp[1].Re - tmp1;
180 p_tmp[1].Re = p_tmp[1].Re + tmp1;
181 p_tmp[5].Im = p_tmp[1].Im - tmp2;
182 p_tmp[1].Im = p_tmp[1].Im + tmp2;
183 p_tmp[7].Re = p_tmp[3].Re + tmp4;
184 p_tmp[3].Re = p_tmp[3].Re - tmp4;
185 p_tmp[7].Im = p_tmp[3].Im - tmp3;
186 p_tmp[3].Im = p_tmp[3].Im + tmp3;
189 step = 1 << (pFFTSpec->order - 4);
191 /* Outer loop that loops over FFT stages. */
192 for (uint32_t fft_stage = 4; fft_stage <= pFFTSpec->order - 2; ++fft_stage) {
195 num_transforms = (num_transforms >> 1) | 1;
196 for (uint32_t n = 0; n < num_transforms; ++n) {
197 OMX_U32 offset = pFFTSpec->pOffset[n] << fft_stage;
198 OMX_FC32* p_tmp = p_buf + offset;
200 tmp1 = p_tmp[n1_2].Re + p_tmp[n3_4].Re;
201 tmp2 = p_tmp[n1_2].Re - p_tmp[n3_4].Re;
202 tmp3 = p_tmp[n1_2].Im + p_tmp[n3_4].Im;
203 tmp4 = p_tmp[n1_2].Im - p_tmp[n3_4].Im;
205 p_tmp[n1_2].Re = p_tmp[0].Re - tmp1;
206 p_tmp[0].Re = p_tmp[0].Re + tmp1;
207 p_tmp[n1_2].Im = p_tmp[0].Im - tmp3;
208 p_tmp[0].Im = p_tmp[0].Im + tmp3;
209 p_tmp[n3_4].Re = p_tmp[n1_4].Re + tmp4;
210 p_tmp[n1_4].Re = p_tmp[n1_4].Re - tmp4;
211 p_tmp[n3_4].Im = p_tmp[n1_4].Im - tmp2;
212 p_tmp[n1_4].Im = p_tmp[n1_4].Im + tmp2;
214 w_re_ptr = pFFTSpec->pTwiddle + step;
216 pFFTSpec->pTwiddle + (OMX_U32)(1 << pFFTSpec->order - 2) - step;
219 * Loop performing split-radix butterfly operations for one sub-transform.
221 for (uint32_t i = 1; i < n1_4; ++i) {
225 tmp1 = w_re * p_tmp[n1_2 + i].Re - w_im * p_tmp[n1_2 + i].Im;
226 tmp2 = w_re * p_tmp[n1_2 + i].Im + w_im * p_tmp[n1_2 + i].Re;
227 tmp3 = w_re * p_tmp[n3_4 + i].Re + w_im * p_tmp[n3_4 + i].Im;
228 tmp4 = w_re * p_tmp[n3_4 + i].Im - w_im * p_tmp[n3_4 + i].Re;
235 p_tmp[n1_2 + i].Re = p_tmp[i].Re - tmp5;
236 p_tmp[i].Re = p_tmp[i].Re + tmp5;
237 p_tmp[n1_2 + i].Im = p_tmp[i].Im - tmp6;
238 p_tmp[i].Im = p_tmp[i].Im + tmp6;
239 p_tmp[n3_4 + i].Re = p_tmp[n1_4 + i].Re + tmp2;
240 p_tmp[n1_4 + i].Re = p_tmp[n1_4 + i].Re - tmp2;
241 p_tmp[n3_4 + i].Im = p_tmp[n1_4 + i].Im - tmp1;
242 p_tmp[n1_4 + i].Im = p_tmp[n1_4 + i].Im + tmp1;
252 /* Last FFT stage, write data to output buffer. */
255 factor = (OMX_F32)2.0f / fft_size;
257 p_dst = (OMX_FC32*)pDst;
259 tmp1 = p_buf[n1_2].Re + p_buf[n3_4].Re;
260 tmp2 = p_buf[n1_2].Re - p_buf[n3_4].Re;
261 tmp3 = p_buf[n1_2].Im + p_buf[n3_4].Im;
262 tmp4 = p_buf[n1_2].Im - p_buf[n3_4].Im;
264 p_dst[n1_2].Re = factor * (p_buf[0].Re - tmp1);
265 p_dst[0].Re = factor * (p_buf[0].Re + tmp1);
266 p_dst[n1_2].Im = factor * (p_buf[0].Im - tmp3);
267 p_dst[0].Im = factor * (p_buf[0].Im + tmp3);
268 p_dst[n3_4].Re = factor * (p_buf[n1_4].Re + tmp4);
269 p_dst[n1_4].Re = factor * (p_buf[n1_4].Re - tmp4);
270 p_dst[n3_4].Im = factor * (p_buf[n1_4].Im - tmp2);
271 p_dst[n1_4].Im = factor * (p_buf[n1_4].Im + tmp2);
273 w_re_ptr = pFFTSpec->pTwiddle + step;
274 w_im_ptr = pFFTSpec->pTwiddle + (OMX_U32)(1 << pFFTSpec->order - 2) - step;
276 for (uint32_t i = 1; i < n1_4; ++i) {
280 tmp1 = w_re * p_buf[n1_2 + i].Re - w_im * p_buf[n1_2 + i].Im;
281 tmp2 = w_re * p_buf[n1_2 + i].Im + w_im * p_buf[n1_2 + i].Re;
282 tmp3 = w_re * p_buf[n3_4 + i].Re + w_im * p_buf[n3_4 + i].Im;
283 tmp4 = w_re * p_buf[n3_4 + i].Im - w_im * p_buf[n3_4 + i].Re;
290 p_dst[n1_2 + i].Re = factor * (p_buf[i].Re - tmp5);
291 p_dst[i].Re = factor * (p_buf[i].Re + tmp5);
292 p_dst[n1_2 + i].Im = factor * (p_buf[i].Im - tmp6);
293 p_dst[i].Im = factor * (p_buf[i].Im + tmp6);
294 p_dst[n3_4 + i].Re = factor * (p_buf[n1_4 + i].Re + tmp2);
295 p_dst[n1_4 + i].Re = factor * (p_buf[n1_4 + i].Re - tmp2);
296 p_dst[n3_4 + i].Im = factor * (p_buf[n1_4 + i].Im - tmp1);
297 p_dst[n1_4 + i].Im = factor * (p_buf[n1_4 + i].Im + tmp1);
303 return OMX_Sts_NoErr;