2 @// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
4 @// Use of this source code is governed by a BSD-style license
5 @// that can be found in the LICENSE file in the root of the source
6 @// tree. An additional intellectual property rights grant can be found
7 @// in the file PATENTS. All contributing project authors may
8 @// be found in the AUTHORS file in the root of the source tree.
10 @// This file was originally licensed as follows. It has been
11 @// relicensed with permission from the copyright holders.
15 @// File Name: armSP_FFT_CToC_SC32_Radix4_fs_unsafe_s.s
16 @// OpenMAX DL: v1.0.2
17 @// Last Modified Revision: 7767
18 @// Last Modified Date: Thu, 27 Sep 2007
20 @// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
25 @// Compute a first stage Radix 4 FFT stage for a N point complex signal
30 @// Include standard headers
32 #include "dl/api/arm/armCOMM_s.h"
33 #include "dl/api/arm/omxtypes_s.h"
35 @// Import symbols required from other files
36 @// (For example tables)
41 @// Set debugging level
42 @//DEBUG_ON SETL {TRUE}
46 @// Guarding implementation by the processor name
50 @// Guarding implementation by the processor name
58 #define pPingPongBuf r5
66 @//Local Scratch Registers
69 @// Reuse grpSize as setCount
72 #define outPointStep r4
117 .MACRO FFTSTAGE scaled, inverse, name
119 @// Define stack arguments
121 @// pT0+1 increments pT0 by 8 bytes
122 @// pT0+pointStep = increment of 8*pointStep bytes = 2*grpSize bytes
123 @// Note: outPointStep = pointStep for firststage
125 MOV pointStep,subFFTNum,LSL #1
128 @// Update pSubFFTSize and pSubFFTNum regs
129 VLD2 {dXr0,dXi0},[pSrc :128],pointStep @// data[0]
130 MOV subFFTSize,#4 @// subFFTSize = 1 for the first stage
132 @// Note: setCount = subFFTNum/4 (reuse the grpSize reg for setCount)
133 LSR grpSize,subFFTNum,#2
134 VLD2 {dXr1,dXi1},[pSrc :128],pointStep @// data[1]
135 MOV subFFTNum,grpSize
138 @// Calculate the step of input data for the next set
139 @//MOV setStep,pointStep,LSL #1
140 MOV setStep,grpSize,LSL #4
141 VLD2 {dXr2,dXi2},[pSrc :128],pointStep @// data[2]
142 ADD setStep,setStep,pointStep @// setStep = 3*pointStep
143 RSB setStep,setStep,#16 @// setStep = - 3*pointStep+16
145 VLD2 {dXr3,dXi3},[pSrc :128],setStep @// data[3] & update pSrc for the next set
146 MOV step1,pointStep,LSL #1 @// step1 = 2*pointStep
148 .ifeqs "\scaled", "TRUE"
154 RSB step3,pointStep,#0 @// step3 = -pointStep
156 @// grp = 0 a special case since all the twiddle factors are 1
157 @// Loop on the sets : 2 sets at a time
159 grpZeroSetLoop\name :
163 @// Decrement setcount
164 SUBS setCount,setCount,#2 @// decrement the set loop counter
166 .ifeqs "\scaled", "TRUE"
168 @// finish first stage of 4 point FFT
172 VLD2 {dXr0,dXi0},[pSrc :128],step1 @// data[0]
174 VLD2 {dXr2,dXi2},[pSrc :128],step3 @// data[2]
178 @// finish second stage of 4 point FFT
180 .ifeqs "\inverse", "TRUE"
182 VLD2 {dXr1,dXi1},[pSrc :128],step1 @// data[1]
185 VLD2 {dXr3,dXi3},[pSrc :128],setStep @// data[3] & update pSrc for the next set
188 VST2 {dZr0,dZi0},[pDst :128],outPointStep
192 VST2 {dZr3,dZi3},[pDst :128],outPointStep
195 VST2 {dZr1,dZi1},[pDst :128],outPointStep
198 VHADD qY0,qX0,qX2 @// u0 for next iteration
199 VST2 {dZr2,dZi2},[pDst :128],setStep
204 VLD2 {dXr1,dXi1},[pSrc :128],step1 @// data[1]
207 VLD2 {dXr3,dXi3},[pSrc :128],setStep @// data[3] & update pSrc for the next set
210 VST2 {dZr0,dZi0},[pDst :128],outPointStep
214 VST2 {dZr2,dZi2},[pDst :128],outPointStep
217 VST2 {dZr1,dZi1},[pDst :128],outPointStep
220 VHADD qY0,qX0,qX2 @// u0 for next iteration
221 VST2 {dZr3,dZi3},[pDst :128],setStep
229 @// finish first stage of 4 point FFT
234 VLD2 {dXr0,dXi0},[pSrc :128],step1 @// data[0]
236 VLD2 {dXr2,dXi2},[pSrc :128],step3 @// data[2]
240 @// finish second stage of 4 point FFT
242 .ifeqs "\inverse", "TRUE"
244 VLD2 {dXr1,dXi1},[pSrc :128],step1 @// data[1]
247 VLD2 {dXr3,dXi3},[pSrc :128],setStep @// data[3] & update pSrc for the next set
250 VST2 {dZr0,dZi0},[pDst :128],outPointStep
254 VST2 {dZr3,dZi3},[pDst :128],outPointStep
257 VST2 {dZr1,dZi1},[pDst :128],outPointStep
260 VADD qY0,qX0,qX2 @// u0 for next iteration
261 VST2 {dZr2,dZi2},[pDst :128],setStep
266 VLD2 {dXr1,dXi1},[pSrc :128],step1 @// data[1]
269 VLD2 {dXr3,dXi3},[pSrc :128],setStep @// data[3] & update pSrc for the next set
272 VST2 {dZr0,dZi0},[pDst :128],outPointStep
276 VST2 {dZr2,dZi2},[pDst :128],outPointStep
279 VST2 {dZr1,dZi1},[pDst :128],outPointStep
282 VADD qY0,qX0,qX2 @// u0 for next iteration
283 VST2 {dZr3,dZi3},[pDst :128],setStep
289 BGT grpZeroSetLoop\name
291 @// reset pSrc to pDst for the next stage
292 SUB pSrc,pDst,pointStep @// pDst -= 2*grpSize
293 MOV pDst,pPingPongBuf
300 M_START armSP_FFTFwd_CToC_SC32_Radix4_fs_OutOfPlace_unsafe,r4
301 FFTSTAGE "FALSE","FALSE",fwd
306 M_START armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe,r4
307 FFTSTAGE "FALSE","TRUE",inv
311 M_START armSP_FFTFwd_CToC_SC32_Sfs_Radix4_fs_OutOfPlace_unsafe,r4
312 FFTSTAGE "TRUE","FALSE",fwdsfs
316 M_START armSP_FFTInv_CToC_SC32_Sfs_Radix4_fs_OutOfPlace_unsafe,r4
317 FFTSTAGE "TRUE","TRUE",invsfs