2 // Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
4 // Use of this source code is governed by a BSD-style license
5 // that can be found in the LICENSE file in the root of the source
6 // tree. An additional intellectual property rights grant can be found
7 // in the file PATENTS. All contributing project authors may
8 // be found in the AUTHORS file in the root of the source tree.
11 // This is a modification of armSP_FFT_CToC_SC32_Radix4_fs_unsafe_s.s
12 // to support float instead of SC32.
17 // Compute a first stage Radix 4 FFT stage for a N point complex signal
22 // Include standard headers
24 #include "dl/api/arm/arm64COMM_s.h"
25 #include "dl/api/arm/omxtypes_s.h"
27 // Import symbols required from other files
28 // (For example tables)
33 // Set debugging level
34 //DEBUG_ON SETL {TRUE}
38 // Guarding implementation by the processor name
42 // Guarding implementation by the processor name
50 #define pSubFFTSize x4
56 //Local Scratch Registers
61 // Reuse grpSize as setCount
64 #define outPointStep x8
97 .macro FFTSTAGE scaled, inverse, name
99 // Define stack arguments
101 // Move args values into our work registers
102 ldr subFFTNum, [pSubFFTNum]
103 ldr subFFTSize, [pSubFFTSize]
105 // pT0+1 increments pT0 by 8 bytes
106 // pT0+pointStep = increment of 8*pointStep bytes = 2*grpSize bytes
107 // Note: outPointStep = pointStep for firststage
109 lsl pointStep, subFFTNum, #1
111 // Update pSubFFTSize and pSubFFTNum regs
112 ld2 {dXr0,dXi0}, [pSrc], pointStep // data[0]
114 // subFFTSize = 1 for the first stage
117 // Note: setCount = subFFTNum/4 (reuse the grpSize reg for setCount)
118 LSR grpSize,subFFTNum,#2
119 ld2 {dXr1,dXi1}, [pSrc], pointStep // data[1]
120 MOV subFFTNum,grpSize
123 // Calculate the step of input data for the next set
124 //MOV setStep,pointStep,LSL #1
125 lsl setStep, grpSize, #4
126 ld2 {dXr2,dXi2}, [pSrc], pointStep // data[2]
128 // setStep = 3*pointStep
129 ADD setStep,setStep,pointStep
130 // setStep = - 3*pointStep+16
132 rsb setStep,setStep,#16
133 // data[3] & update pSrc for the next set
134 ld2 {dXr3,dXi3}, [pSrc], setStep
136 // step1 = 2*pointStep
137 lsl step1, pointStep, #1
139 // fadd qY0, qX0, qX2
140 fadd dYr0, dXr0, dXr2
141 fadd dYi0, dXi0, dXi2
142 // step3 = -pointStep
145 // grp = 0 a special case since all the twiddle factors are 1
146 // Loop on the sets : 2 sets at a time
148 radix4fsGrpZeroSetLoop\name :
152 // Decrement setcount
153 SUBS setCount,setCount,#2
156 // finish first stage of 4 point FFT
160 fsub dYr2, dXr0, dXr2
161 fsub dYi2, dXi0, dXi2
163 ld2 {dXr0,dXi0}, [pSrc], step1 // data[0]
165 fadd dYr1, dXr1, dXr3
166 fadd dYi1, dXi1, dXi3
167 ld2 {dXr2,dXi2}, [pSrc], step3 // data[2]
169 fsub dYr3, dXr1, dXr3
170 fsub dYi3, dXi1, dXi3
173 // finish second stage of 4 point FFT
175 .ifeqs "\inverse", "TRUE"
177 ld2 {dXr1,dXi1}, [pSrc], step1 // data[1]
179 fadd dZr0, dYr0, dYr1
180 fadd dZi0, dYi0, dYi1
182 // data[3] & update pSrc for the next set, but not if it's the
183 // last iteration so that we don't read past the end of the
185 BEQ radix4SkipLastUpdateInv\name
186 ld2 {dXr3,dXi3}, [pSrc], setStep
188 radix4SkipLastUpdateInv\name:
191 st2 {dZr0,dZi0},[pDst],outPointStep
195 FSUB dZr1, dYr0, dYr1
196 FSUB dZi1, dYi0, dYi1
197 st2 {dZr3,dZi3},[pDst],outPointStep
200 st2 {dZr1,dZi1},[pDst],outPointStep
203 // fadd qY0, qX0, qX2
204 FADD dYr0, dXr0, dXr2 // u0 for next iteration
205 FADD dYi0, dXi0, dXi2
206 st2 {dZr2,dZi2},[pDst],setStep
211 ld2 {dXr1,dXi1}, [pSrc], step1 // data[1]
213 fadd dZr0, dYr0, dYr1
214 fadd dZi0, dYi0, dYi1
216 // data[3] & update pSrc for the next set, but not if it's the
217 // last iteration so that we don't read past the end of the
219 BEQ radix4SkipLastUpdateFwd\name
220 ld2 {dXr3,dXi3}, [pSrc], setStep
222 radix4SkipLastUpdateFwd\name:
225 st2 {dZr0,dZi0},[pDst],outPointStep
229 fsub dZr1, dYr0, dYr1
230 fsub dZi1, dYi0, dYi1
231 st2 {dZr2,dZi2},[pDst],outPointStep
234 st2 {dZr1,dZi1},[pDst],outPointStep
238 fadd dYr0, dXr0, dXr2 // u0 for next iteration
239 fadd dYi0, dXi0, dXi2
241 st2 {dZr3,dZi3},[pDst],setStep
245 BGT radix4fsGrpZeroSetLoop\name
247 // Save subFFTNum and subFFTSize for next stage
248 str subFFTNum, [pSubFFTNum]
249 str subFFTSize, [pSubFFTSize]
255 M_START armSP_FFTFwd_CToC_FC32_Radix4_fs_OutOfPlace,,d15
256 FFTSTAGE "FALSE","FALSE",fwd
261 M_START armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace,,d15
262 FFTSTAGE "FALSE","TRUE",inv