2 @// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
4 @// Use of this source code is governed by a BSD-style license
5 @// that can be found in the LICENSE file in the root of the source
6 @// tree. An additional intellectual property rights grant can be found
7 @// in the file PATENTS. All contributing project authors may
8 @// be found in the AUTHORS file in the root of the source tree.
10 @// This file was originally licensed as follows. It has been
11 @// relicensed with permission from the copyright holders.
15 @// File Name: armSP_FFT_CToC_SC16_Radix4_ls_unsafe_s.s
16 @// OpenMAX DL: v1.0.2
17 @// Last Modified Revision: 7765
18 @// Last Modified Date: Thu, 27 Sep 2007
20 @// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
25 @// Compute a Radix 4 FFT stage for a N point complex signal
30 @// Include standard headers
32 #include "dl/api/arm/armCOMM_s.h"
33 #include "dl/api/arm/omxtypes_s.h"
36 @// Import symbols required from other files
37 @// (For example tables)
42 @// Set debugging level
43 @//DEBUG_ON SETL {TRUE}
46 @// Guarding implementation by the processor name
53 @// Guarding implementation by the processor name
56 @// Import symbols required from other files
57 @// (For example tables)
58 @//IMPORT armAAC_constTable
73 @//Local Scratch Registers
75 #define outPointStep r3
86 #define dButterfly1Real02 D0.S16
87 #define dButterfly1Imag02 D1.S16
88 #define dButterfly1Real13 D2.S16
89 #define dButterfly1Imag13 D3.S16
90 #define dButterfly2Real02 D4.S16
91 #define dButterfly2Imag02 D5.S16
92 #define dButterfly2Real13 D6.S16
93 #define dButterfly2Imag13 D7.S16
103 #define dW1rS32 D8.S32
104 #define dW1iS32 D9.S32
105 #define dW2rS32 D10.S32
106 #define dW2iS32 D11.S32
107 #define dW3rS32 D12.S32
108 #define dW3iS32 D13.S32
117 #define dTmp0 D12.S16
118 #define dTmp1 D13.S16
119 #define dTmp1S32 D13.S32
120 #define dTmp2S32 D14.S32
121 #define dTmp3S32 D15.S32
161 .macro FFTSTAGE scaled, inverse , name
163 @// Define stack arguments
169 @// pOut0+1 increments pOut0 by 8 bytes
170 @// pOut0+outPointStep == increment of 4*outPointStep bytes
171 MOV outPointStep,subFFTSize,LSL #2
173 MOV subFFTNum,#1 @//after the last stage
174 LSL grpCount,subFFTSize,#2
177 @// Update grpCount and grpSize rightaway
179 @// update subFFTSize for the next stage
180 MOV subFFTSize,grpCount
181 MOV dstStep,outPointStep,LSL #1
183 ADD dstStep,dstStep,outPointStep @// dstStep = 3*outPointStep
184 RSB dstStep,dstStep,#16 @// dstStep = - 3*outPointStep+16
186 @// Process 4 groups at a time
189 VLD4 {dButterfly1Real02,dButterfly1Imag02,dButterfly1Real13,dButterfly1Imag13},[pSrc :256]! @// AC.r AC.i BD.r BD.i
190 VLD4 {dButterfly2Real02,dButterfly2Imag02,dButterfly2Real13,dButterfly2Imag13},[pSrc :256]! @// AC.r AC.i BD.r BD.i
192 @// Load the second twiddle for 4 groups : w^2
193 @// w^2 twiddle (2i+0,2i+2,2i+4,2i+6) for group 0,1,2,3
194 VLD4 {dW2r,dW2i,dTmp0,dTmp1},[pw2 :256]!
196 VUZP dButterfly1Real13, dButterfly2Real13 @// B.r D.r
198 @// Load the third twiddle for 4 groups : w^3
199 @// w^3 twiddle (3i+0,3i+3,3i+6,3i+9) for group 0,1,2,3
200 VLD3 {dW3rS32,dTmp1S32,dTmp2S32},[pw3 :64]!
202 VUZP dButterfly1Imag13, dButterfly2Imag13 @// B.i D.i
203 VUZP dButterfly1Real02, dButterfly2Real02 @// A.r C.r
205 VLD3 {dW3iS32,dTmp2S32,dTmp3S32},[pw3 :64]!
207 VUZP dButterfly1Imag02, dButterfly2Imag02 @// A.i C.i
209 VLD2 {dW1r,dW1i}, [pw1 :128]!
211 @// Rearrange the third twiddle
213 SUBS grpCount,grpCount,#16 @// grpCount is multiplied by 4
215 .ifeqs "\inverse", "TRUE"
217 VMLAL qT0,dXi1,dW1i @// real part
219 VMLSL qT1,dXr1,dW1i @// imag part
223 VMLSL qT0,dXi1,dW1i @// real part
225 VMLAL qT1,dXr1,dW1i @// imag part
229 @// Load the first twiddle for 4 groups : w^1
230 @// w^1 twiddle (i+0,i+1,i+2,i+3) for group 0,1,2,3
232 .ifeqs "\inverse", "TRUE"
234 VMLAL qT2,dXi2,dW2i @// real part
236 VMLSL qT3,dXr2,dW2i @// imag part
240 VMLSL qT2,dXi2,dW2i @// real part
242 VMLAL qT3,dXr2,dW2i @// imag part
251 .ifeqs "\inverse", "TRUE"
253 VMLAL qT0,dXi3,dW3i @// real part
255 VMLSL qT1,dXr3,dW3i @// imag part
259 VMLSL qT0,dXi3,dW3i @// real part
261 VMLAL qT1,dXr3,dW3i @// imag part
271 .ifeqs "\scaled", "TRUE"
273 @// finish first stage of 4 point FFT
281 @// finish second stage of 4 point FFT
287 .ifeqs "\inverse", "TRUE"
289 VHADD dZr3,dYr0,dYi3 @// y3 = u0-ju3
290 VST2 {dZr0,dZi0},[pDst :128],outPointStep
293 VHSUB dZr1,dYr0,dYi3 @// y1 = u0+ju3
295 VST2 {dZr3,dZi3},[pDst :128],outPointStep
296 VST2 {dZr2,dZi2},[pDst :128],outPointStep
297 VST2 {dZr1,dZi1},[pDst :128],dstStep @// dstStep = -3*outPointStep + 16
301 VHSUB dZr1,dYr0,dYi3 @// y1 = u0+ju3
304 VHADD dZr3,dYr0,dYi3 @// y3 = u0-ju3
305 VST2 {dZr0,dZi0},[pDst :128],outPointStep
307 VST2 {dZr1,dZi1},[pDst :128],outPointStep
308 VST2 {dZr2,dZi2},[pDst :128],outPointStep
309 VST2 {dZr3,dZi3},[pDst :128],dstStep @// dstStep = -3*outPointStep + 16
315 @// finish first stage of 4 point FFT
323 @// finish second stage of 4 point FFT
329 .ifeqs "\inverse", "TRUE"
331 VADD dZr3,dYr0,dYi3 @// y3 = u0-ju3
332 VST2 {dZr0,dZi0},[pDst :128],outPointStep
335 VSUB dZr1,dYr0,dYi3 @// y1 = u0+ju3
337 VST2 {dZr3,dZi3},[pDst :128],outPointStep
338 VST2 {dZr2,dZi2},[pDst :128],outPointStep
339 VST2 {dZr1,dZi1},[pDst :128],dstStep @// dstStep = -3*outPointStep + 16
343 VSUB dZr1,dYr0,dYi3 @// y1 = u0+ju3
346 VADD dZr3,dYr0,dYi3 @// y3 = u0-ju3
347 VST2 {dZr0,dZi0},[pDst :128],outPointStep
349 VST2 {dZr1,dZi1},[pDst :128],outPointStep
350 VST2 {dZr2,dZi2},[pDst :128],outPointStep
351 VST2 {dZr3,dZi3},[pDst :128],dstStep @// dstStep = -3*outPointStep + 16
363 @// Reset and Swap pSrc and pDst for the next stage
365 SUB pDst,pSrc,outPointStep,LSL #2 @// pDst -= size; pSrc -= 4*size bytes
366 SUB pSrc,pTmp,outPointStep
371 M_START armSP_FFTFwd_CToC_SC16_Radix4_ls_OutOfPlace_unsafe,r4
372 FFTSTAGE "FALSE","FALSE",FWD
376 M_START armSP_FFTInv_CToC_SC16_Radix4_ls_OutOfPlace_unsafe,r4
377 FFTSTAGE "FALSE","TRUE",INV
381 M_START armSP_FFTFwd_CToC_SC16_Sfs_Radix4_ls_OutOfPlace_unsafe,r4
382 FFTSTAGE "TRUE","FALSE",FWDSFS
386 M_START armSP_FFTInv_CToC_SC16_Sfs_Radix4_ls_OutOfPlace_unsafe,r4
387 FFTSTAGE "TRUE","TRUE",INVSFS