2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
14 #include "dl/api/omxtypes.h"
15 #include "dl/sp/api/mipsSP.h"
17 OMXResult mips_FFTFwd_RToCCS_F32_real(const OMX_F32* pSrc,
19 const MIPSFFTSpec_R_FC32* pFFTSpec) {
20 OMX_U32 num_transforms;
21 OMX_FC32* p_dst = (OMX_FC32*)pDst;
22 OMX_FC32* p_buf = (OMX_FC32*)pFFTSpec->pBuf;
23 OMX_F32 tmp1, tmp2, tmp3, tmp4;
24 const OMX_F32* w_re_ptr;
25 const OMX_F32* w_im_ptr;
27 /* Transform for order = 2. */
28 /* TODO: hard-code the offsets for p_src. */
29 if (pFFTSpec->order == 2) {
30 OMX_U16* p_bitrev = pFFTSpec->pBitRev;
32 tmp1 = pSrc[p_bitrev[0]] + pSrc[p_bitrev[1]];
33 tmp2 = pSrc[p_bitrev[2]] + pSrc[p_bitrev[3]];
34 tmp3 = pSrc[p_bitrev[0]] - pSrc[p_bitrev[1]];
35 tmp4 = pSrc[p_bitrev[2]] - pSrc[p_bitrev[3]];
37 p_dst[0].Re = tmp1 + tmp2;
38 p_dst[2].Re = tmp1 - tmp2;
48 * Loop performing sub-transforms of size 4, which contain two butterfly
49 * operations. Reading the input signal from split-radix bitreverse offsets.
51 num_transforms = (SUBTRANSFORM_CONST >> (16 - pFFTSpec->order)) | 1;
52 for (uint32_t n = 0; n < num_transforms; ++n) {
53 OMX_U32 offset = pFFTSpec->pOffset[n] << 2;
54 OMX_FC32* p_tmp = p_buf + offset;
55 OMX_U16* p_bitrev = pFFTSpec->pBitRev + offset;
57 tmp1 = pSrc[p_bitrev[0]] + pSrc[p_bitrev[1]];
58 tmp2 = pSrc[p_bitrev[2]] + pSrc[p_bitrev[3]];
59 tmp3 = pSrc[p_bitrev[0]] - pSrc[p_bitrev[1]];
60 tmp4 = pSrc[p_bitrev[2]] - pSrc[p_bitrev[3]];
62 p_tmp[0].Re = tmp1 + tmp2;
63 p_tmp[2].Re = tmp1 - tmp2;
73 * Loop performing sub-transforms of size 8,
74 * which contain four butterfly operations.
77 if (!num_transforms) {
79 * Means the FFT size is equal to 8, so this is the last stage. Place the
80 * output to the destination buffer and avoid unnecessary computations.
82 OMX_FC32* p_tmp = p_buf;
83 OMX_U16* p_bitrev = pFFTSpec->pBitRev;
86 tmp1 = pSrc[p_bitrev[4]] + pSrc[p_bitrev[5]];
87 tmp2 = pSrc[p_bitrev[6]] + pSrc[p_bitrev[7]];
91 tmp1 = pSrc[p_bitrev[4]] - pSrc[p_bitrev[5]];
92 tmp2 = pSrc[p_bitrev[6]] - pSrc[p_bitrev[7]];
93 tmp5 = SQRT1_2 * (tmp1 + tmp2);
94 tmp1 = SQRT1_2 * (tmp1 - tmp2);
96 p_dst[4].Re = p_tmp[0].Re - tmp3;
97 p_dst[0].Re = p_tmp[0].Re + tmp3;
98 p_dst[0].Im = p_tmp[0].Im;
99 p_dst[4].Im = p_tmp[0].Im;
100 p_dst[2].Re = p_tmp[2].Re;
101 p_dst[2].Im = p_tmp[2].Im - tmp4;
102 p_dst[1].Re = p_tmp[1].Re + tmp5;
103 p_dst[1].Im = p_tmp[1].Im - tmp1;
104 p_dst[3].Re = p_tmp[3].Re - tmp5;
105 p_dst[3].Im = p_tmp[3].Im - tmp1;
107 return OMX_Sts_NoErr;
112 for (uint32_t n = 0; n < num_transforms; ++n) {
113 OMX_U32 offset = pFFTSpec->pOffset[n] << 3;
114 OMX_FC32* p_tmp = p_buf + offset;
115 OMX_U16* p_bitrev = pFFTSpec->pBitRev + offset;
118 tmp1 = pSrc[p_bitrev[4]] + pSrc[p_bitrev[5]];
119 tmp2 = pSrc[p_bitrev[6]] + pSrc[p_bitrev[7]];
123 tmp1 = pSrc[p_bitrev[4]] - pSrc[p_bitrev[5]];
124 tmp2 = pSrc[p_bitrev[6]] - pSrc[p_bitrev[7]];
125 tmp5 = SQRT1_2 * (tmp1 + tmp2);
126 tmp1 = SQRT1_2 * (tmp1 - tmp2);
128 p_tmp[4].Re = p_tmp[0].Re - tmp3;
129 p_tmp[0].Re = p_tmp[0].Re + tmp3;
130 p_tmp[4].Im = p_tmp[0].Im;
131 p_tmp[6].Re = p_tmp[2].Re;
132 p_tmp[6].Im = p_tmp[2].Im + tmp4;
133 p_tmp[2].Im = p_tmp[2].Im - tmp4;
135 p_tmp[5].Re = p_tmp[1].Re - tmp5;
136 p_tmp[1].Re = p_tmp[1].Re + tmp5;
137 p_tmp[5].Im = p_tmp[1].Im + tmp1;
138 p_tmp[1].Im = p_tmp[1].Im - tmp1;
139 p_tmp[7].Re = p_tmp[3].Re + tmp5;
140 p_tmp[3].Re = p_tmp[3].Re - tmp5;
141 p_tmp[7].Im = p_tmp[3].Im + tmp1;
142 p_tmp[3].Im = p_tmp[3].Im - tmp1;
146 * Last FFT stage, performing sub-transforms of size 16. Place the output
147 * into the destination buffer and avoid unnecessary computations.
149 tmp1 = p_buf[8].Re + p_buf[12].Re;
150 tmp2 = p_buf[8].Re - p_buf[12].Re;
151 tmp3 = p_buf[8].Im + p_buf[12].Im;
152 tmp4 = p_buf[8].Im - p_buf[12].Im;
154 p_dst[8].Re = p_buf[0].Re - tmp1;
155 p_dst[0].Re = p_buf[0].Re + tmp1;
156 p_dst[8].Im = p_buf[0].Im - tmp3;
157 p_dst[0].Im = p_buf[0].Im + tmp3;
158 p_dst[4].Re = p_buf[4].Re + tmp4;
159 p_dst[4].Im = p_buf[4].Im - tmp2;
161 w_re_ptr = pFFTSpec->pTwiddle + 1;
162 w_im_ptr = pFFTSpec->pTwiddle + (OMX_U32)(1 << pFFTSpec->order - 2) - 1;
164 /* Loop performing split-radix butterfly operations. */
165 for (uint32_t n = 1; n < 4; ++n) {
167 OMX_F32 w_re = *w_re_ptr;
168 OMX_F32 w_im = *w_im_ptr;
170 tmp1 = w_re * p_buf[8 + n].Re + w_im * p_buf[8 + n].Im;
171 tmp2 = w_re * p_buf[8 + n].Im - w_im * p_buf[8 + n].Re;
172 tmp3 = w_re * p_buf[12 + n].Re - w_im * p_buf[12 + n].Im;
173 tmp4 = w_re * p_buf[12 + n].Im + w_im * p_buf[12 + n].Re;
180 p_dst[n].Re = p_buf[n].Re + tmp5;
181 p_dst[n].Im = p_buf[n].Im + tmp6;
182 p_dst[4 + n].Re = p_buf[4 + n].Re + tmp2;
183 p_dst[4 + n].Im = p_buf[4 + n].Im - tmp1;
188 return OMX_Sts_NoErr;