2 @// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
4 @// Use of this source code is governed by a BSD-style license
5 @// that can be found in the LICENSE file in the root of the source
6 @// tree. An additional intellectual property rights grant can be found
7 @// in the file PATENTS. All contributing project authors may
8 @// be found in the AUTHORS file in the root of the source tree.
10 @// This file was originally licensed as follows. It has been
11 @// relicensed with permission from the copyright holders.
15 @// File Name: armSP_FFT_CToC_SC32_Radix4_unsafe_s.s
16 @// OpenMAX DL: v1.0.2
17 @// Last Modified Revision: 7767
18 @// Last Modified Date: Thu, 27 Sep 2007
20 @// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
25 @// Compute a Radix 4 FFT stage for a N point complex signal
31 @// Include standard headers
33 #include "dl/api/arm/armCOMM_s.h"
34 #include "dl/api/arm/omxtypes_s.h"
37 @// Import symbols required from other files
38 @// (For example tables)
43 @// Set debugging level
44 @//DEBUG_ON SETL {TRUE}
48 @// Guarding implementation by the processor name
53 @// Guarding implementation by the processor name
56 @// Import symbols required from other files
57 @// (For example tables)
73 @//Local Scratch Registers
77 #define outPointStep r5
78 #define stepTwiddle r12
133 .macro FFTSTAGE scaled, inverse , name
135 @// Define stack arguments
138 @// Update grpCount and grpSize rightaway inorder to reuse pGrpCount and pGrpSize regs
140 LSL grpCount,subFFTSize,#2
141 LSR subFFTNum,subFFTNum,#2
142 MOV subFFTSize,grpCount
144 VLD1 dW1,[pTwiddle] @//[wi | wr]
145 @// pT0+1 increments pT0 by 8 bytes
146 @// pT0+pointStep = increment of 8*pointStep bytes = 2*grpSize bytes
147 MOV pointStep,subFFTNum,LSL #1
150 @// pOut0+1 increments pOut0 by 8 bytes
151 @// pOut0+outPointStep == increment of 8*outPointStep bytes = 2*size bytes
154 VLD1 dW2,[pTwiddle] @//[wi | wr]
155 SMULBB outPointStep,grpCount,pointStep
156 LSL pointStep,pointStep,#2 @// 2*grpSize
158 VLD1 dW3,[pTwiddle] @//[wi | wr]
159 MOV srcStep,pointStep,LSL #1 @// srcStep = 2*pointStep
160 ADD setStep,srcStep,pointStep @// setStep = 3*pointStep
161 @//RSB setStep,setStep,#16 @// setStep = - 3*pointStep+16
162 RSB setStep,setStep,#0 @// setStep = - 3*pointStep
163 SUB srcStep,srcStep,#16 @// srcStep = 2*pointStep-16
165 MOV dstStep,outPointStep,LSL #1
166 ADD dstStep,dstStep,outPointStep @// dstStep = 3*outPointStep
167 RSB dstStep,dstStep,#16 @// dstStep = - 3*outPointStep+16
173 VLD2 {dXr0,dXi0},[pSrc],pointStep @// data[0]
174 ADD stepTwiddle,stepTwiddle,pointStep
175 VLD2 {dXr1,dXi1},[pSrc],pointStep @// data[1]
176 ADD pTwiddle,pTwiddle,stepTwiddle @// set pTwiddle to the first point
177 VLD2 {dXr2,dXi2},[pSrc],pointStep @// data[2]
178 MOV twStep,stepTwiddle,LSL #2
180 VLD2 {dXr3,dXi3},[pSrc],setStep @// data[3] & update pSrc for the next set
181 SUB twStep,stepTwiddle,twStep @// twStep = -3*stepTwiddle
183 MOV setCount,pointStep,LSR #3
184 ADD pSrc,pSrc,#16 @// set pSrc to data[0] of the next set
185 ADD pSrc,pSrc,pointStep @// increment to data[1] of the next set
194 SUBS setCount,setCount,#2 @// decrement the loop counter
196 .ifeqs "\inverse", "TRUE"
197 VMULL qT0,dXr1,dW1[0]
198 VMLAL qT0,dXi1,dW1[1] @// real part
199 VMULL qT1,dXi1,dW1[0]
200 VMLSL qT1,dXr1,dW1[1] @// imag part
203 VMULL qT0,dXr1,dW1[0]
204 VMLSL qT0,dXi1,dW1[1] @// real part
205 VMULL qT1,dXi1,dW1[0]
206 VMLAL qT1,dXr1,dW1[1] @// imag part
210 VLD2 {dXr1,dXi1},[pSrc],pointStep @// data[1] for next iteration
212 .ifeqs "\inverse", "TRUE"
213 VMULL qT2,dXr2,dW2[0]
214 VMLAL qT2,dXi2,dW2[1] @// real part
215 VMULL qT3,dXi2,dW2[0]
216 VMLSL qT3,dXr2,dW2[1] @// imag part
219 VMULL qT2,dXr2,dW2[0]
220 VMLSL qT2,dXi2,dW2[1] @// real part
221 VMULL qT3,dXi2,dW2[0]
222 VMLAL qT3,dXr2,dW2[1] @// imag part
228 VLD2 {dXr2,dXi2},[pSrc],pointStep @// data[2] for next iteration
231 .ifeqs "\inverse", "TRUE"
232 VMULL qT0,dXr3,dW3[0]
233 VMLAL qT0,dXi3,dW3[1] @// real part
234 VMULL qT1,dXi3,dW3[0]
235 VMLSL qT1,dXr3,dW3[1] @// imag part
238 VMULL qT0,dXr3,dW3[0]
239 VMLSL qT0,dXi3,dW3[1] @// real part
240 VMULL qT1,dXi3,dW3[0]
241 VMLAL qT1,dXr3,dW3[1] @// imag part
251 VLD2 {dXr3,dXi3},[pSrc],setStep @// data[3] & update pSrc to data[0]
253 .ifeqs "\scaled", "TRUE"
255 @// finish first stage of 4 point FFT
259 VLD2 {dXr0,dXi0},[pSrc]! @// data[0] for next iteration
263 @// finish second stage of 4 point FFT
268 .ifeqs "\inverse", "TRUE"
271 VST2 {dZr0,dZi0},[pDst :128],outPointStep
275 VST2 {dZr3,dZi3},[pDst :128],outPointStep
278 VST2 {dZr2,dZi2},[pDst :128],outPointStep
281 VST2 {dZr1,dZi1},[pDst :128],dstStep
287 VST2 {dZr0,dZi0},[pDst :128],outPointStep
291 VST2 {dZr1,dZi1},[pDst :128],outPointStep
294 VST2 {dZr2,dZi2},[pDst :128],outPointStep
297 VST2 {dZr3,dZi3},[pDst :128],dstStep
305 @// finish first stage of 4 point FFT
309 VLD2 {dXr0,dXi0},[pSrc :128]! @// data[0] for next iteration
313 @// finish second stage of 4 point FFT
318 .ifeqs "\inverse", "TRUE"
321 VST2 {dZr0,dZi0},[pDst :128],outPointStep
325 VST2 {dZr3,dZi3},[pDst :128],outPointStep
328 VST2 {dZr2,dZi2},[pDst :128],outPointStep
331 VST2 {dZr1,dZi1},[pDst :128],dstStep
337 VST2 {dZr0,dZi0},[pDst :128],outPointStep
341 VST2 {dZr1,dZi1},[pDst :128],outPointStep
344 VST2 {dZr2,dZi2},[pDst :128],outPointStep
347 VST2 {dZr3,dZi3},[pDst :128],dstStep
354 ADD pSrc,pSrc,pointStep @// increment to data[1] of the next set
358 VLD1 dW1,[pTwiddle :64],stepTwiddle @//[wi | wr]
359 SUBS grpCount,grpCount,#4 @// subtract 4 since grpCount multiplied by 4
360 VLD1 dW2,[pTwiddle :64],stepTwiddle @//[wi | wr]
361 ADD pSrc,pSrc,srcStep @// increment pSrc for the next grp
362 VLD1 dW3,[pTwiddle :64],twStep @//[wi | wr]
366 @// Reset and Swap pSrc and pDst for the next stage
368 SUB pDst,pSrc,outPointStep,LSL #2 @// pDst -= 2*size; pSrc -= 8*size bytes
369 SUB pSrc,t1,outPointStep
375 M_START armSP_FFTFwd_CToC_SC32_Radix4_OutOfPlace_unsafe,r4
376 FFTSTAGE "FALSE","FALSE",FWD
380 M_START armSP_FFTInv_CToC_SC32_Radix4_OutOfPlace_unsafe,r4
381 FFTSTAGE "FALSE","TRUE",INV
385 M_START armSP_FFTFwd_CToC_SC32_Sfs_Radix4_OutOfPlace_unsafe,r4
386 FFTSTAGE "TRUE","FALSE",FWDSFS
390 M_START armSP_FFTInv_CToC_SC32_Sfs_Radix4_OutOfPlace_unsafe,r4
391 FFTSTAGE "TRUE","TRUE",INVSFS