2 // Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
4 // Use of this source code is governed by a BSD-style license
5 // that can be found in the LICENSE file in the root of the source
6 // tree. An additional intellectual property rights grant can be found
7 // in the file PATENTS. All contributing project authors may
8 // be found in the AUTHORS file in the root of the source tree.
10 // This is a modification of armSP_FFT_CToC_SC32_Radix2_ls_unsafe_s.S
11 // to support float instead of SC32.
16 // Compute the last stage of a Radix 2 DIT in-order out-of-place FFT
17 // stage for a N point complex signal.
22 // Include standard headers
24 #include "dl/api/arm/arm64COMM_s.h"
25 #include "dl/api/arm/omxtypes_s.h"
28 // Import symbols required from other files
29 // (For example tables)
34 // Set debugging level
35 //DEBUG_ON SETL {TRUE}
38 // Guarding implementation by the processor name
47 #define pSubFFTSize x4
53 //Local Scratch Registers
58 #define outPointStep x8
77 .macro FFTSTAGE scaled, inverse, name
79 // Move parameters into our work registers
80 ldr subFFTSize, [pSubFFTSize]
82 lsl outPointStep, subFFTSize, #3
84 // Update grpCount and grpSize rightaway
86 MOV subFFTNum,#1 //after the last stage
87 LSL grpCount,subFFTSize,#1
89 // update subFFTSize for the next stage
90 MOV subFFTSize,grpCount
92 rsb dstStep,outPointStep,#16
94 // Loop on 2 grps at a time for the last stage
96 radix2lsGrpLoop\name :
97 // dWr = [pTwiddle[0].Re, pTwiddle[1].Re]
98 // dWi = [pTwiddle[0].Im, pTwiddle[1].Im]
99 ld2 {dWr,dWi},[pTwiddle], #16
101 // dXr0 = [pSrc[0].Re, pSrc[2].Re]
102 // dXi0 = [pSrc[0].Im, pSrc[2].Im]
103 // dXr1 = [pSrc[1].Re, pSrc[3].Re]
104 // dXi1 = [pSrc[1].Im, pSrc[3].Im]
105 ld4 {dXr0,dXi0,dXr1,dXi1}, [pSrc], #32
107 SUBS grpCount,grpCount,#4 // grpCount is multiplied by 2
109 .ifeqs "\inverse", "TRUE"
111 fmla qT0,dWi,dXi1 // real part
113 fmls qT1,dWi,dXr1 // imag part
118 fmls qT0,dWi,dXi1 // real part
120 fmla qT1,dWi,dXr1 // imag part
129 st2 {dYr0,dYi0},[pDst],outPointStep
130 st2 {dYr1,dYi1},[pDst],dstStep // dstStep = step = -outPointStep + 16
132 BGT radix2lsGrpLoop\name
139 M_START armSP_FFTFwd_CToC_FC32_Radix2_ls_OutOfPlace,,d12
140 FFTSTAGE "FALSE","FALSE",fwd
145 M_START armSP_FFTInv_CToC_FC32_Radix2_ls_OutOfPlace,,d12
146 FFTSTAGE "FALSE","TRUE",inv