2 @// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
4 @// Use of this source code is governed by a BSD-style license
5 @// that can be found in the LICENSE file in the root of the source
6 @// tree. An additional intellectual property rights grant can be found
7 @// in the file PATENTS. All contributing project authors may
8 @// be found in the AUTHORS file in the root of the source tree.
10 @// This file was originally licensed as follows. It has been
11 @// relicensed with permission from the copyright holders.
15 @// File Name: armSP_FFT_CToC_SC16_Radix4_fs_unsafe_s.s
16 @// OpenMAX DL: v1.0.2
17 @// Last Modified Revision: 7761
18 @// Last Modified Date: Wed, 26 Sep 2007
20 @// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
25 @// Compute a first stage Radix 4 FFT stage for a N point complex signal
30 @// Include standard headers
32 #include "dl/api/arm/armCOMM_s.h"
33 #include "dl/api/arm/omxtypes_s.h"
35 @// Import symbols required from other files
36 @// (For example tables)
41 @// Set debugging level
42 @//DEBUG_ON SETL {TRUE}
46 @// Guarding implementation by the processor name
50 @// Guarding implementation by the processor name
58 #define pPingPongBuf r5
66 @//Local Scratch Registers
69 @// Reuse grpSize as setCount
72 #define outPointStep r4
116 .macro FFTSTAGE scaled, inverse, name
118 @// Define stack arguments
120 MOV pointStep,subFFTNum
121 @// Update pSubFFTSize and pSubFFTNum regs
124 VLD2 {dXr0,dXi0},[pSrc :128],pointStep @// data[0]
125 @// Note: setCount = subFFTNum/4 (reuse the grpSize reg for setCount)
126 LSR grpSize,subFFTNum,#2
127 MOV subFFTNum,grpSize
130 @// pT0+1 increments pT0 by 4 bytes
131 @// pT0+pointStep = increment of 4*pointStep bytes = grpSize bytes
132 @// Note: outPointStep = pointStep for firststage
133 VLD2 {dXr1,dXi1},[pSrc :128],pointStep @// data[1]
136 @// Calculate the step of input data for the next set
137 @//MOV setStep,pointStep,LSL #1
138 MOV setStep,grpSize,LSL #3
139 VLD2 {dXr2,dXi2},[pSrc :128],pointStep @// data[2]
141 ADD setStep,setStep,pointStep @// setStep = 3*pointStep
142 RSB setStep,setStep,#16 @// setStep = - 3*pointStep+16
145 MOV subFFTSize,#4 @// subFFTSize = 1 for the first stage
148 .ifeqs "\scaled", "TRUE"
149 VHADD qY0,qX0,qX2 @// u0
151 VADD qY0,qX0,qX2 @// u0
153 RSB step3,pointStep,#0
155 @// grp = 0 a special case since all the twiddle factors are 1
156 @// Loop on the sets: 4 sets at a time
160 VLD2 {dXr3,dXi3},[pSrc :128],setStep @// data[3]
162 .ifeqs "\scaled", "TRUE"
164 @// finish first stage of 4 point FFT
166 VHSUB qY2,qX0,qX2 @// u1
167 SUBS setCount,setCount,#4 @// decrement the set loop counter
169 VLD2 {dXr0,dXi0},[pSrc :128],step1 @// data[0]
170 VHADD qY1,qX1,qX3 @// u2
171 VLD2 {dXr2,dXi2},[pSrc :128],step3
172 VHSUB qY3,qX1,qX3 @// u3
176 @// finish second stage of 4 point FFT
178 VLD2 {dXr1,dXi1},[pSrc :128],step1 @// data[1]
179 VHADD qZ0,qY0,qY1 @// y0
181 .ifeqs "\inverse", "TRUE"
183 VHSUB dZr3,dYr2,dYi3 @// y3
185 VST2 {dZr0,dZi0},[pDst :128],outPointStep
187 VHSUB qZ1,qY0,qY1 @// y2
188 VST2 {dZr3,dZi3},[pDst :128],outPointStep
190 VHADD dZr2,dYr2,dYi3 @// y1
191 VST2 {dZr1,dZi1},[pDst :128],outPointStep
194 VHADD qY0,qX0,qX2 @// u0 (next loop)
195 VST2 {dZr2,dZi2},[pDst :128],setStep
200 VHADD dZr2,dYr2,dYi3 @// y1
203 VST2 {dZr0,dZi0},[pDst :128],outPointStep
204 VHSUB qZ1,qY0,qY1 @// y2
206 VST2 {dZr2,dZi2},[pDst :128],outPointStep
207 VHSUB dZr3,dYr2,dYi3 @// y3
209 VST2 {dZr1,dZi1},[pDst :128],outPointStep
210 VHADD qY0,qX0,qX2 @// u0 (next loop)
211 VST2 {dZr3,dZi3},[pDst :128],setStep
218 @// finish first stage of 4 point FFT
220 VSUB qY2,qX0,qX2 @// u1
221 SUBS setCount,setCount,#4 @// decrement the set loop counter
223 VLD2 {dXr0,dXi0},[pSrc :128],step1 @// data[0]
224 VADD qY1,qX1,qX3 @// u2
225 VLD2 {dXr2,dXi2},[pSrc :128],step3
226 VSUB qY3,qX1,qX3 @// u3
230 @// finish second stage of 4 point FFT
232 VLD2 {dXr1,dXi1},[pSrc :128],step1 @// data[1]
233 VADD qZ0,qY0,qY1 @// y0
235 .ifeqs "\inverse", "TRUE"
237 VSUB dZr3,dYr2,dYi3 @// y3
239 VST2 {dZr0,dZi0},[pDst :128],outPointStep
241 VSUB qZ1,qY0,qY1 @// y2
242 VST2 {dZr3,dZi3},[pDst :128],outPointStep
244 VADD dZr2,dYr2,dYi3 @// y1
245 VST2 {dZr1,dZi1},[pDst :128],outPointStep
248 VADD qY0,qX0,qX2 @// u0 (next loop)
249 VST2 {dZr2,dZi2},[pDst :128],setStep
254 VADD dZr2,dYr2,dYi3 @// y1
257 VST2 {dZr0,dZi0},[pDst :128],outPointStep
258 VSUB qZ1,qY0,qY1 @// y2
260 VST2 {dZr2,dZi2},[pDst :128],outPointStep
261 VSUB dZr3,dYr2,dYi3 @// y3
263 VST2 {dZr1,dZi1},[pDst :128],outPointStep
264 VADD qY0,qX0,qX2 @// u0 (next loop)
265 VST2 {dZr3,dZi3},[pDst :128],setStep
272 BGT grpZeroSetLoop\name
275 @// reset pSrc to pDst for the next stage
276 SUB pSrc,pDst,pointStep @// pDst -= grpSize
277 MOV pDst,pPingPongBuf
284 M_START armSP_FFTFwd_CToC_SC16_Radix4_fs_OutOfPlace_unsafe,r4
285 FFTSTAGE "FALSE","FALSE",FWD
290 M_START armSP_FFTInv_CToC_SC16_Radix4_fs_OutOfPlace_unsafe,r4
291 FFTSTAGE "FALSE","TRUE",INV
295 M_START armSP_FFTFwd_CToC_SC16_Sfs_Radix4_fs_OutOfPlace_unsafe,r4
296 FFTSTAGE "TRUE","FALSE",FWDSFS
300 M_START armSP_FFTInv_CToC_SC16_Sfs_Radix4_fs_OutOfPlace_unsafe,r4
301 FFTSTAGE "TRUE","TRUE",INVSFS