2 @// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
4 @// Use of this source code is governed by a BSD-style license
5 @// that can be found in the LICENSE file in the root of the source
6 @// tree. An additional intellectual property rights grant can be found
7 @// in the file PATENTS. All contributing project authors may
8 @// be found in the AUTHORS file in the root of the source tree.
10 @// This is a modification of
11 @// armSP_FFTInv_CCSToR_S32_preTwiddleRadix2_unsafe_s.s to support float
17 @// Compute the "preTwiddleRadix2" stage prior to the call to the complexFFT
18 @// It does a Z(k) = Feven(k) + jW^(-k) FOdd(k); k=0,1,2,...N/2-1 computation
23 @// Include standard headers
25 #include "dl/api/arm/armCOMM_s.h"
26 #include "dl/api/arm/omxtypes_s.h"
29 @// Import symbols required from other files
30 @// (For example tables)
33 @// Set debugging level
34 @//DEBUG_ON SETL {TRUE}
38 @// Guarding implementation by the processor name
42 @// Guarding implementation by the processor name
57 @//Local Scratch Registers
70 @// Total num of radix stages required to complete the FFT
74 #define diffMinusOne r2
82 #define pTwiddleTmp r11
83 #define argTwiddle1 r12
118 #define dW0Tmp D10.F32
119 #define dW1Neg D11.F32
123 @ Structure offsets for the FFTSpec
124 .set ARMsFFTSpec_N, 0
125 .set ARMsFFTSpec_pBitRev, 4
126 .set ARMsFFTSpec_pTwiddle, 8
127 .set ARMsFFTSpec_pBuf, 12
129 .macro FFTSTAGE scaled, inverse, name
131 @// Read the size from structure and take log
132 LDR N, [pFFTSpec, #ARMsFFTSpec_N]
134 @// Read other structure parameters
135 LDR pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
136 LDR pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
141 MOV size,N,ASR #1 @// preserve the contents of N
142 MOV step,N,LSL #2 @// step = N/2 * 8 bytes
145 @// Z(k) = 1/2 {[F(k) + F'(N/2-k)] +j*W^(-k) [F(k) - F'(N/2-k)]}
146 @// Note: W^(k) is stored as negated value and also need to
147 @// conjugate the values from the table
149 @// Z(0) : no need of twiddle multiply
150 @// Z(0) = 1/2 { [F(0) + F'(N/2)] +j [F(0) - F'(N/2)] }
153 ADD pOut1,pOut,step @// pOut1 = pOut+ N/2*8 bytes
156 @// twStep = 3N/8 * 8 bytes pointing to W^1
157 SUB twStep,step,size,LSL #1
159 MOV step1,size,LSL #2 @// step1 = N/4 * 8 = N/2*4 bytes
160 SUB step1,step1,#8 @// (N/4-1)*8 bytes
162 VADD dY0,dX0,dX1 @// [b+d | a+c]
163 VSUB dY1,dX0,dX1 @// [b-d | a-c]
164 VMUL dY0, dY0, half[0]
165 VMUL dY1, dY1, half[0]
167 @// dY0= [a-c | a+c] ;dY1= [b-d | b+d]
177 ADD pTwiddleTmp,pTwiddle,#8 @// W^2
179 ADD argTwiddle1,pTwiddle,twStep @// W^1
182 BLT decrementScale\name
186 @// Z(k) = 1/2[F(k) + F'(N/2-k)] +j*W^(-k) [F(k) - F'(N/2-k)]
187 @// Note: W^k is stored as negative values in the table and also
188 @// need to conjugate the values from the table.
190 @// Process 4 elements at a time. E.g: Z(1),Z(2) and Z(N/2-2),Z(N/2-1)
191 @// since both of them require F(1),F(2) and F(N/2-2),F(N/2-1)
195 evenOddButterflyLoop\name :
198 VLD1 dW0r,[argTwiddle1],step1
199 VLD1 dW1r,[argTwiddle1]!
201 VLD2 {dX0r,dX0i},[pSrc],step
202 SUB argTwiddle1,argTwiddle1,step1
203 VLD2 {dX1r,dX1i},[pSrc]!
205 SUB step1,step1,#8 @// (N/4-2)*8 bytes
206 VLD1 dW0i,[pTwiddleTmp],step1
207 VLD1 dW1i,[pTwiddleTmp]!
210 SUB pTwiddleTmp,pTwiddleTmp,step1
216 VSUB dT2,dX0r,dX1r @// a-c
217 VADD dT3,dX0i,dX1i @// b+d
218 VADD dT0,dX0r,dX1r @// a+c
219 VSUB dT1,dX0i,dX1i @// b-d
222 VMUL dT2, dT2, half[0]
223 VMUL dT3, dT3, half[0]
225 VMUL dT0, dT0, half[0]
226 VMUL dT1, dT1, half[0]
244 VADD dY1r,dT0,dX1i @// F(N/2 -1)
251 VADD dY0r,dT0,dX0i @// F(1)
255 VST2 {dY0r,dY0i},[pOut1],step
256 VST2 {dY1r,dY1i},[pOut1]!
258 SUB step,step,#32 @// (N/2-4)*8 bytes
261 BGT evenOddButterflyLoop\name
264 @// set both the ptrs to the last element
268 @// Last element can be expanded as follows
269 @// 1/2[Z(k) + Z'(k)] - j w^-k [Z(k) - Z'(k)] (since W^k is stored as
271 @// 1/2[(a+jb) + (a-jb)] - j w^-k [(a+jb) - (a-jb)]
272 @// 1/2[2a+j0] - j (c-jd) [0+j2b]
274 @// Since (c,d) = (0,1) for the last element, result is just (a,-b)
279 VST1 dX0r[0],[pOut1]!
285 decrementScale\name :
289 M_START armSP_FFTInv_CCSToR_F32_preTwiddleRadix2_unsafe,r4
291 FFTSTAGE "FALSE","TRUE",Inv