2 @ Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
4 @ Use of this source code is governed by a BSD-style license
5 @ that can be found in the LICENSE file in the root of the source
6 @ tree. An additional intellectual property rights grant can be found
7 @ in the file PATENTS. All contributing project authors may
8 @ be found in the AUTHORS file in the root of the source tree.
10 @ Some code in this file was originally from file
11 @ omxSP_FFTInv_CToC_SC16_Sfs_s.S which was licensed as follows.
12 @ It has been relicensed with permission from the copyright holders.
16 @ File Name: omxSP_FFTInv_CToC_SC16_Sfs_s.s
18 @ Last Modified Revision: 6729
19 @ Last Modified Date: Tue, 17 Jul 2007
21 @ (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
26 @ Compute an inverse FFT for a 16-bit real signal, with complex FFT routines.
29 #include "dl/api/arm/armCOMM_s.h"
30 #include "dl/api/arm/omxtypes_s.h"
32 .extern armSP_FFTInv_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe
33 .extern armSP_FFTInv_CToC_SC16_Radix2_fs_OutOfPlace_unsafe
34 .extern armSP_FFTInv_CToC_SC16_Radix4_fs_OutOfPlace_unsafe
35 .extern armSP_FFTInv_CToC_SC16_Radix4_ls_OutOfPlace_unsafe
36 .extern armSP_FFTInv_CToC_SC16_Radix8_fs_OutOfPlace_unsafe
37 .extern armSP_FFTInv_CToC_SC16_Radix4_OutOfPlace_unsafe
38 .extern armSP_FFTInv_CToC_SC16_Sfs_Radix4_fs_OutOfPlace_unsafe
39 .extern armSP_FFTInv_CToC_SC16_Sfs_Radix4_ls_OutOfPlace_unsafe
40 .extern armSP_FFTInv_CToC_SC16_Sfs_Radix8_fs_OutOfPlace_unsafe
41 .extern armSP_FFTInv_CToC_SC16_Sfs_Radix4_OutOfPlace_unsafe
42 .extern armSP_FFTInv_CToC_SC16_Sfs_Radix2_OutOfPlace_unsafe
43 .extern armSP_FFTInv_CToC_SC16_Radix2_OutOfPlace_unsafe
44 .extern armSP_FFTInv_CToC_SC16_Sfs_Radix2_ls_OutOfPlace_unsafe
45 .extern armSP_FFTInv_CToC_SC16_Radix2_ls_OutOfPlace_unsafe
46 .extern armSP_FFTInv_CToC_SC16_Radix2_ps_OutOfPlace_unsafe
47 .extern armSP_FFTInv_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe
58 @Local Scratch Registers
70 @ Total num of radix stages to comple the FFT
74 #define diffMinusOne r2
81 #define pTwiddleTmp r11
82 #define argTwiddle1 r12
114 #define dzero D20.S32
119 #define dW0Tmp D10.S32
120 #define dW1Neg D11.S32
124 @ Allocate stack memory required by the function
125 M_ALLOC4 diffOnStack, 4
127 @ Write function header
128 M_START omxSP_FFTInv_CCSToR_S16_Sfs,r11,d15
130 @ Structure offsets for the FFTSpec
131 .set ARMsFFTSpec_N, 0
132 .set ARMsFFTSpec_pBitRev, 4
133 .set ARMsFFTSpec_pTwiddle, 8
134 .set ARMsFFTSpec_pBuf, 12
136 @ Define stack arguments
138 @ Read the size from structure and take log
139 LDR N, [pFFTSpec, #ARMsFFTSpec_N]
141 @ Read other structure parameters
142 LDR pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
143 LDR pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
145 @ Call the preTwiddle Radix2 stage before doing the complex IFFT
147 @ The following conditional BL combination would work since
148 @ evenOddButterflyLoop in the first call would set Z flag to zero
151 BLEQ armSP_FFTInv_CCSToR_S16_preTwiddleRadix2_unsafe
152 BLGT armSP_FFTInv_CCSToR_S16_Sfs_preTwiddleRadix2_unsafe
156 ASR N,N,#1 @ N/2 point complex IFFT
157 ADD pSrc,pOut,N,LSL #2 @ set pSrc as pOut1
159 CLZ order,N @ N = 2^order
163 ADD scale,scale,order @ FFTInverse has a final scaling factor by N
166 BGT orderGreaterthan3 @ order > 3
169 BGE orderGreaterthan0 @ order > 0
170 M_STR scale, diffOnStack,LT @ order = 0
177 @ set the buffers appropriately for various orders
181 MOVEQ pOut,pDst @ Pass the first stage destination in RN5
182 MOV argTwiddle,pTwiddle
183 @ Store the scale factor and scale at the end
185 M_STR diff, diffOnStack
186 BGE orderGreaterthan1
187 BLLT armSP_FFTInv_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe @ order = 1
192 MOV tmpOrder,order @ tmpOrder = RN 4
193 BL armSP_FFTInv_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe
195 BLGT armSP_FFTInv_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe
196 BL armSP_FFTInv_CToC_SC16_Sfs_Radix2_ls_OutOfPlace_unsafe
203 @ check scale = 0 or scale = order
204 SUB diff, scale, order @ scale > order
206 TST order, #2 @ Set input args to fft stages
209 MOVEQ pOut,pDst @ Pass the first stage destination in RN5
210 MOV argTwiddle,pTwiddle
213 M_STR diff, diffOnStack
216 @check for even or odd order
217 @ NOTE: The following combination of BL's would work fine eventhough the first
218 @ BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside
219 @ armSP_FFTInv_CToC_SC16_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ
221 TST order,#0x00000001
222 BLEQ armSP_FFTInv_CToC_SC16_Radix4_fs_OutOfPlace_unsafe
223 BLNE armSP_FFTInv_CToC_SC16_Radix8_fs_OutOfPlace_unsafe
229 BEQ lastStageUnscaledRadix4
230 BL armSP_FFTInv_CToC_SC16_Radix4_OutOfPlace_unsafe
234 lastStageUnscaledRadix4:
235 BL armSP_FFTInv_CToC_SC16_Radix4_ls_OutOfPlace_unsafe
239 @check for even or odd order
240 @ NOTE: The following combination of BL's would work fine eventhough the first
241 @ BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside
242 @ armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ
244 TST order,#0x00000001
245 BLEQ armSP_FFTInv_CToC_SC16_Sfs_Radix4_fs_OutOfPlace_unsafe
246 BLNE armSP_FFTInv_CToC_SC16_Sfs_Radix8_fs_OutOfPlace_unsafe
252 BEQ lastStageScaledRadix4
253 BL armSP_FFTInv_CToC_SC16_Sfs_Radix4_OutOfPlace_unsafe
257 lastStageScaledRadix4:
258 BL armSP_FFTInv_CToC_SC16_Sfs_Radix4_ls_OutOfPlace_unsafe
260 FFTEnd: @ Does only the scaling
262 M_LDR diff, diffOnStack
266 RSB diff,diff,#0 @ to use VRSHL for right shift by a variable
269 @ Use parallel loads for bigger FFT size.
274 VLD1 {qT0s, qT1s},[pSrc:256] @ pSrc contains pDst pointer
275 SUBS subFFTSize,subFFTSize,#8
278 VST1 {qT0s, qT1s},[pSrc:256]!
282 scaleLessFFTData: @ N = subFFTSize ; dataptr = pDst ; scale = diff
283 VLD1 {dX0[0]},[pSrc] @ pSrc contains pDst pointer
284 SUBS subFFTSize,subFFTSize,#1
286 VST1 {dX0[0]},[pSrc]!
291 MOV result, #OMX_Sts_NoErr
293 @ Write function tail