412b64fb59a3f1e65c0783b1e81e1b1a474ee6bc
[platform/framework/web/crosswalk.git] / src / third_party / openmax_dl / dl / sp / src / arm / neon / armSP_FFT_CToC_SC32_Radix4_fs_unsafe_s.S
1 @//
2 @//  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3 @//
4 @//  Use of this source code is governed by a BSD-style license
5 @//  that can be found in the LICENSE file in the root of the source
6 @//  tree. An additional intellectual property rights grant can be found
7 @//  in the file PATENTS.  All contributing project authors may
8 @//  be found in the AUTHORS file in the root of the source tree.
9 @//
10 @//  This file was originally licensed as follows. It has been
11 @//  relicensed with permission from the copyright holders.
12 @//
13
14 @// 
15 @// File Name:  armSP_FFT_CToC_SC32_Radix4_fs_unsafe_s.s
16 @// OpenMAX DL: v1.0.2
17 @// Last Modified Revision:   7767
18 @// Last Modified Date:       Thu, 27 Sep 2007
19 @// 
20 @// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
21 @// 
22 @// 
23 @//
24 @// Description:
25 @// Compute a first stage Radix 4 FFT stage for a N point complex signal
26 @// 
27
28
29         
30 @// Include standard headers
31
32 #include "dl/api/arm/armCOMM_s.h"
33 #include "dl/api/arm/omxtypes_s.h"
34         
35 @// Import symbols required from other files
36 @// (For example tables)
37     
38         
39         
40         
41 @// Set debugging level        
42 @//DEBUG_ON    SETL {TRUE}
43
44
45
46 @// Guarding implementation by the processor name
47     
48     
49     
50 @// Guarding implementation by the processor name
51     
52     
53 @//Input Registers
54
55 #define pSrc            r0
56 #define pDst            r2
57 #define pTwiddle        r1
58 #define pPingPongBuf    r5
59 #define subFFTNum       r6
60 #define subFFTSize      r7
61
62
63 @//Output Registers
64
65
66 @//Local Scratch Registers
67
68 #define grpSize         r3
69 @// Reuse grpSize as setCount
70 #define setCount        r3
71 #define pointStep       r4
72 #define outPointStep    r4
73 #define setStep         r8
74 #define step1           r9
75 #define step3           r10
76
77 @// Neon Registers
78
79 #define dXr0    D0.S32
80 #define dXi0    D1.S32
81 #define dXr1    D2.S32
82 #define dXi1    D3.S32
83 #define dXr2    D4.S32
84 #define dXi2    D5.S32
85 #define dXr3    D6.S32
86 #define dXi3    D7.S32
87 #define dYr0    D8.S32
88 #define dYi0    D9.S32
89 #define dYr1    D10.S32
90 #define dYi1    D11.S32
91 #define dYr2    D12.S32
92 #define dYi2    D13.S32
93 #define dYr3    D14.S32
94 #define dYi3    D15.S32
95 #define qX0     Q0.S32
96 #define qX1     Q1.S32
97 #define qX2     Q2.S32
98 #define qX3     Q3.S32
99 #define qY0     Q4.S32
100 #define qY1     Q5.S32
101 #define qY2     Q6.S32
102 #define qY3     Q7.S32
103 #define dZr0    D16.S32
104 #define dZi0    D17.S32
105 #define dZr1    D18.S32
106 #define dZi1    D19.S32
107 #define dZr2    D20.S32
108 #define dZi2    D21.S32
109 #define dZr3    D22.S32
110 #define dZi3    D23.S32
111 #define qZ0     Q8.S32
112 #define qZ1     Q9.S32
113 #define qZ2     Q10.S32
114 #define qZ3     Q11.S32
115
116     
117         .MACRO FFTSTAGE scaled, inverse, name
118         
119         @// Define stack arguments
120         
121         @// pT0+1 increments pT0 by 8 bytes
122         @// pT0+pointStep = increment of 8*pointStep bytes = 2*grpSize bytes
123         @// Note: outPointStep = pointStep for firststage
124         
125         MOV     pointStep,subFFTNum,LSL #1
126         
127         
128         @// Update pSubFFTSize and pSubFFTNum regs
129         VLD2    {dXr0,dXi0},[pSrc :128],pointStep          @//  data[0]
130         MOV     subFFTSize,#4                                 @// subFFTSize = 1 for the first stage
131         
132         @// Note: setCount = subFFTNum/4 (reuse the grpSize reg for setCount)
133         LSR     grpSize,subFFTNum,#2
134         VLD2    {dXr1,dXi1},[pSrc :128],pointStep          @//  data[1]  
135         MOV     subFFTNum,grpSize
136         
137                                        
138         @// Calculate the step of input data for the next set
139         @//MOV     setStep,pointStep,LSL #1
140         MOV     setStep,grpSize,LSL #4
141         VLD2    {dXr2,dXi2},[pSrc :128],pointStep          @//  data[2]
142         ADD     setStep,setStep,pointStep                   @// setStep = 3*pointStep
143         RSB     setStep,setStep,#16                         @// setStep = - 3*pointStep+16
144         
145         VLD2    {dXr3,dXi3},[pSrc :128],setStep            @//  data[3] & update pSrc for the next set
146         MOV     step1,pointStep,LSL #1                      @// step1 = 2*pointStep
147         
148         .ifeqs "\scaled", "TRUE"
149             VHADD    qY0,qX0,qX2
150         .else
151             VADD    qY0,qX0,qX2
152         .endif
153             
154         RSB     step3,pointStep,#0                          @// step3 = -pointStep                          
155         
156         @// grp = 0 a special case since all the twiddle factors are 1
157         @// Loop on the sets : 2 sets at a time
158
159 grpZeroSetLoop\name :   
160         
161         
162         
163         @// Decrement setcount
164         SUBS    setCount,setCount,#2                    @// decrement the set loop counter           
165         
166         .ifeqs "\scaled", "TRUE" 
167         
168             @// finish first stage of 4 point FFT 
169                         
170             VHSUB    qY2,qX0,qX2
171             
172             VLD2    {dXr0,dXi0},[pSrc :128],step1          @//  data[0]
173             VHADD    qY1,qX1,qX3
174             VLD2    {dXr2,dXi2},[pSrc :128],step3          @//  data[2]
175             VHSUB    qY3,qX1,qX3
176             
177                        
178             @// finish second stage of 4 point FFT 
179                                                 
180             .ifeqs "\inverse", "TRUE"
181                    
182                 VLD2    {dXr1,dXi1},[pSrc :128],step1          @//  data[1]
183                 VHADD    qZ0,qY0,qY1
184             
185                 VLD2    {dXr3,dXi3},[pSrc :128],setStep            @//  data[3] & update pSrc for the next set    
186                 VHSUB    dZr3,dYr2,dYi3
187                 
188                 VST2    {dZr0,dZi0},[pDst :128],outPointStep
189                 VHADD    dZi3,dYi2,dYr3
190                 
191                 VHSUB    qZ1,qY0,qY1
192                 VST2    {dZr3,dZi3},[pDst :128],outPointStep
193                 
194                 VHADD    dZr2,dYr2,dYi3
195                 VST2    {dZr1,dZi1},[pDst :128],outPointStep
196                 VHSUB    dZi2,dYi2,dYr3
197                 
198                 VHADD    qY0,qX0,qX2                     @// u0 for next iteration
199                 VST2    {dZr2,dZi2},[pDst :128],setStep
200                 
201                 
202             .else
203                 
204                 VLD2    {dXr1,dXi1},[pSrc :128],step1          @//  data[1]
205                 VHADD    qZ0,qY0,qY1
206             
207                 VLD2    {dXr3,dXi3},[pSrc :128],setStep            @//  data[3] & update pSrc for the next set
208                 VHADD    dZr2,dYr2,dYi3
209             
210                 VST2    {dZr0,dZi0},[pDst :128],outPointStep
211                 VHSUB    dZi2,dYi2,dYr3
212             
213                 VHSUB    qZ1,qY0,qY1
214                 VST2    {dZr2,dZi2},[pDst :128],outPointStep
215             
216                 VHSUB    dZr3,dYr2,dYi3
217                 VST2    {dZr1,dZi1},[pDst :128],outPointStep
218                 VHADD    dZi3,dYi2,dYr3
219             
220                 VHADD    qY0,qX0,qX2                     @// u0 for next iteration
221                 VST2    {dZr3,dZi3},[pDst :128],setStep
222             
223             .endif
224             
225         
226         
227         .else
228         
229             @// finish first stage of 4 point FFT 
230             
231             
232             VSUB    qY2,qX0,qX2
233             
234             VLD2    {dXr0,dXi0},[pSrc :128],step1          @//  data[0]
235             VADD    qY1,qX1,qX3
236             VLD2    {dXr2,dXi2},[pSrc :128],step3          @//  data[2]
237             VSUB    qY3,qX1,qX3
238             
239                        
240             @// finish second stage of 4 point FFT 
241                                                 
242             .ifeqs "\inverse", "TRUE" 
243                    
244                 VLD2    {dXr1,dXi1},[pSrc :128],step1          @//  data[1]
245                 VADD    qZ0,qY0,qY1
246             
247                 VLD2    {dXr3,dXi3},[pSrc :128],setStep            @//  data[3] & update pSrc for the next set    
248                 VSUB    dZr3,dYr2,dYi3
249                 
250                 VST2    {dZr0,dZi0},[pDst :128],outPointStep
251                 VADD    dZi3,dYi2,dYr3
252                 
253                 VSUB    qZ1,qY0,qY1
254                 VST2    {dZr3,dZi3},[pDst :128],outPointStep
255                 
256                 VADD    dZr2,dYr2,dYi3
257                 VST2    {dZr1,dZi1},[pDst :128],outPointStep
258                 VSUB    dZi2,dYi2,dYr3
259                 
260                 VADD    qY0,qX0,qX2                     @// u0 for next iteration
261                 VST2    {dZr2,dZi2},[pDst :128],setStep
262                 
263                 
264             .else
265                 
266                 VLD2    {dXr1,dXi1},[pSrc :128],step1          @//  data[1]
267                 VADD    qZ0,qY0,qY1
268             
269                 VLD2    {dXr3,dXi3},[pSrc :128],setStep            @//  data[3] & update pSrc for the next set
270                 VADD    dZr2,dYr2,dYi3
271             
272                 VST2    {dZr0,dZi0},[pDst :128],outPointStep
273                 VSUB    dZi2,dYi2,dYr3
274             
275                 VSUB    qZ1,qY0,qY1
276                 VST2    {dZr2,dZi2},[pDst :128],outPointStep
277             
278                 VSUB    dZr3,dYr2,dYi3
279                 VST2    {dZr1,dZi1},[pDst :128],outPointStep
280                 VADD    dZi3,dYi2,dYr3
281             
282                 VADD    qY0,qX0,qX2                     @// u0 for next iteration
283                 VST2    {dZr3,dZi3},[pDst :128],setStep
284             
285             .endif
286             
287         .endif
288         
289         BGT     grpZeroSetLoop\name
290         
291         @// reset pSrc to pDst for the next stage
292         SUB     pSrc,pDst,pointStep                     @// pDst -= 2*grpSize  
293         MOV     pDst,pPingPongBuf
294         
295         
296         .endm
297
298                 
299         
300         M_START armSP_FFTFwd_CToC_SC32_Radix4_fs_OutOfPlace_unsafe,r4
301         FFTSTAGE "FALSE","FALSE",fwd
302         M_END
303
304         
305         
306         M_START armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe,r4
307         FFTSTAGE "FALSE","TRUE",inv
308         M_END
309  
310                 
311         M_START armSP_FFTFwd_CToC_SC32_Sfs_Radix4_fs_OutOfPlace_unsafe,r4
312         FFTSTAGE "TRUE","FALSE",fwdsfs
313         M_END
314
315                 
316         M_START armSP_FFTInv_CToC_SC32_Sfs_Radix4_fs_OutOfPlace_unsafe,r4
317         FFTSTAGE "TRUE","TRUE",invsfs
318         M_END
319     
320         .end