Upstream version 9.38.198.0
[platform/framework/web/crosswalk.git] / src / third_party / openmax_dl / dl / sp / src / arm / neon / armSP_FFT_CToC_SC16_Radix4_fs_unsafe_s.S
1 @//
2 @//  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3 @//
4 @//  Use of this source code is governed by a BSD-style license
5 @//  that can be found in the LICENSE file in the root of the source
6 @//  tree. An additional intellectual property rights grant can be found
7 @//  in the file PATENTS.  All contributing project authors may
8 @//  be found in the AUTHORS file in the root of the source tree.
9 @//
10 @//  This file was originally licensed as follows. It has been
11 @//  relicensed with permission from the copyright holders.
12
13 @//
14 @//
15 @// File Name:  armSP_FFT_CToC_SC16_Radix4_fs_unsafe_s.s
16 @// OpenMAX DL: v1.0.2
17 @// Last Modified Revision:   7761
18 @// Last Modified Date:       Wed, 26 Sep 2007
19 @//
20 @// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
21 @//
22 @//
23 @//
24 @// Description:
25 @// Compute a first stage Radix 4 FFT stage for a N point complex signal
26 @//
27 @//
28
29
30 @// Include standard headers
31
32 #include "dl/api/arm/armCOMM_s.h"
33 #include "dl/api/arm/omxtypes_s.h"
34
35 @// Import symbols required from other files
36 @// (For example tables)
37
38
39
40
41 @// Set debugging level
42 @//DEBUG_ON    SETL {TRUE}
43
44
45
46 @// Guarding implementation by the processor name
47
48
49
50 @// Guarding implementation by the processor name
51
52
53 @//Input Registers
54
55 #define pSrc                            r0
56 #define pDst                            r2
57 #define pTwiddle                        r1
58 #define pPingPongBuf                    r5
59 #define subFFTNum                       r6
60 #define subFFTSize                      r7
61
62
63 @//Output Registers
64
65
66 @//Local Scratch Registers
67
68 #define grpSize                         r3
69 @// Reuse grpSize as setCount
70 #define setCount                        r3
71 #define pointStep                       r4
72 #define outPointStep                    r4
73 #define setStep                         r8
74 #define step1                           r9
75 #define step3                           r10
76
77 @// Neon Registers
78
79 #define dXr0                            D0.S16
80 #define dXi0                            D1.S16
81 #define dXr1                            D2.S16
82 #define dXi1                            D3.S16
83 #define dXr2                            D4.S16
84 #define dXi2                            D5.S16
85 #define dXr3                            D6.S16
86 #define dXi3                            D7.S16
87 #define dYr0                            D8.S16
88 #define dYi0                            D9.S16
89 #define dYr1                            D10.S16
90 #define dYi1                            D11.S16
91 #define dYr2                            D12.S16
92 #define dYi2                            D13.S16
93 #define dYr3                            D14.S16
94 #define dYi3                            D15.S16
95 #define dZr0                            D16.S16
96 #define dZi0                            D17.S16
97 #define dZr1                            D18.S16
98 #define dZi1                            D19.S16
99 #define dZr2                            D20.S16
100 #define dZi2                            D21.S16
101 #define dZr3                            D22.S16
102 #define dZi3                            D23.S16
103 #define qY0                             Q4.S16
104 #define qY2                             Q6.S16
105 #define qX0                             Q0.S16
106 #define qX2                             Q2.S16
107
108 #define qY1                             Q5.S16
109 #define qY3                             Q7.S16
110 #define qX1                             Q1.S16
111 #define qX3                             Q3.S16
112 #define qZ0                             Q8.S16
113 #define qZ1                             Q9.S16
114
115
116         .macro FFTSTAGE scaled, inverse, name
117
118         @// Define stack arguments
119
120         MOV     pointStep,subFFTNum
121         @// Update pSubFFTSize and pSubFFTNum regs
122
123
124         VLD2    {dXr0,dXi0},[pSrc :128],pointStep          @//  data[0]
125         @// Note: setCount = subFFTNum/4 (reuse the grpSize reg for setCount)
126         LSR     grpSize,subFFTNum,#2
127         MOV     subFFTNum,grpSize
128
129
130         @// pT0+1 increments pT0 by 4 bytes
131         @// pT0+pointStep = increment of 4*pointStep bytes = grpSize bytes
132         @// Note: outPointStep = pointStep for firststage
133         VLD2    {dXr1,dXi1},[pSrc :128],pointStep          @//  data[1]
134
135
136         @// Calculate the step of input data for the next set
137         @//MOV     setStep,pointStep,LSL #1
138         MOV     setStep,grpSize,LSL #3
139         VLD2    {dXr2,dXi2},[pSrc :128],pointStep          @//  data[2]
140         MOV     step1,setStep
141         ADD     setStep,setStep,pointStep             @// setStep = 3*pointStep
142         RSB     setStep,setStep,#16                   @// setStep = - 3*pointStep+16
143
144
145         MOV     subFFTSize,#4                         @// subFFTSize = 1 for the first stage
146
147
148         .ifeqs  "\scaled", "TRUE"
149             VHADD    qY0,qX0,qX2             @// u0
150         .else
151             VADD   qY0,qX0,qX2               @// u0
152         .endif
153         RSB     step3,pointStep,#0
154
155         @// grp = 0 a special case since all the twiddle factors are 1
156         @// Loop on the sets: 4 sets at a time
157
158 grpZeroSetLoop\name:
159
160         VLD2    {dXr3,dXi3},[pSrc :128],setStep            @//  data[3]
161
162         .ifeqs "\scaled", "TRUE"
163
164             @// finish first stage of 4 point FFT
165
166             VHSUB    qY2,qX0,qX2             @// u1
167             SUBS    setCount,setCount,#4                    @// decrement the set loop counter
168
169             VLD2    {dXr0,dXi0},[pSrc :128],step1          @//  data[0]
170             VHADD    qY1,qX1,qX3             @// u2
171             VLD2    {dXr2,dXi2},[pSrc :128],step3
172             VHSUB    qY3,qX1,qX3             @// u3
173
174
175
176             @// finish second stage of 4 point FFT
177
178             VLD2    {dXr1,dXi1},[pSrc :128],step1          @//  data[1]
179             VHADD    qZ0,qY0,qY1             @// y0
180
181             .ifeqs  "\inverse", "TRUE"
182
183                 VHSUB    dZr3,dYr2,dYi3                  @// y3
184                 VHADD    dZi3,dYi2,dYr3
185                 VST2    {dZr0,dZi0},[pDst :128],outPointStep
186
187                 VHSUB    qZ1,qY0,qY1                     @// y2
188                 VST2    {dZr3,dZi3},[pDst :128],outPointStep
189
190                 VHADD    dZr2,dYr2,dYi3                  @// y1
191                 VST2    {dZr1,dZi1},[pDst :128],outPointStep
192                 VHSUB    dZi2,dYi2,dYr3
193
194                 VHADD    qY0,qX0,qX2                     @// u0 (next loop)
195                 VST2    {dZr2,dZi2},[pDst :128],setStep
196
197
198             .else
199
200                 VHADD    dZr2,dYr2,dYi3                  @// y1
201                 VHSUB    dZi2,dYi2,dYr3
202
203                 VST2    {dZr0,dZi0},[pDst :128],outPointStep
204                 VHSUB    qZ1,qY0,qY1                     @// y2
205
206                 VST2    {dZr2,dZi2},[pDst :128],outPointStep
207                 VHSUB    dZr3,dYr2,dYi3                  @// y3
208                 VHADD    dZi3,dYi2,dYr3
209                 VST2    {dZr1,dZi1},[pDst :128],outPointStep
210                 VHADD    qY0,qX0,qX2                     @// u0 (next loop)
211                 VST2    {dZr3,dZi3},[pDst :128],setStep
212
213             .endif
214
215
216         .else
217
218             @// finish first stage of 4 point FFT
219
220             VSUB    qY2,qX0,qX2             @// u1
221             SUBS    setCount,setCount,#4                    @// decrement the set loop counter
222
223             VLD2    {dXr0,dXi0},[pSrc :128],step1          @//  data[0]
224             VADD    qY1,qX1,qX3             @// u2
225             VLD2    {dXr2,dXi2},[pSrc :128],step3
226             VSUB    qY3,qX1,qX3             @// u3
227
228
229
230             @// finish second stage of 4 point FFT
231
232             VLD2    {dXr1,dXi1},[pSrc :128],step1          @//  data[1]
233             VADD    qZ0,qY0,qY1             @// y0
234
235             .ifeqs  "\inverse", "TRUE"
236
237                 VSUB    dZr3,dYr2,dYi3                  @// y3
238                 VADD    dZi3,dYi2,dYr3
239                 VST2    {dZr0,dZi0},[pDst :128],outPointStep
240
241                 VSUB    qZ1,qY0,qY1                     @// y2
242                 VST2    {dZr3,dZi3},[pDst :128],outPointStep
243
244                 VADD    dZr2,dYr2,dYi3                  @// y1
245                 VST2    {dZr1,dZi1},[pDst :128],outPointStep
246                 VSUB    dZi2,dYi2,dYr3
247
248                 VADD    qY0,qX0,qX2                     @// u0 (next loop)
249                 VST2    {dZr2,dZi2},[pDst :128],setStep
250
251
252             .else
253
254                 VADD    dZr2,dYr2,dYi3                  @// y1
255                 VSUB    dZi2,dYi2,dYr3
256
257                 VST2    {dZr0,dZi0},[pDst :128],outPointStep
258                 VSUB    qZ1,qY0,qY1                     @// y2
259
260                 VST2    {dZr2,dZi2},[pDst :128],outPointStep
261                 VSUB    dZr3,dYr2,dYi3                  @// y3
262                 VADD    dZi3,dYi2,dYr3
263                 VST2    {dZr1,dZi1},[pDst :128],outPointStep
264                 VADD    qY0,qX0,qX2                     @// u0 (next loop)
265                 VST2    {dZr3,dZi3},[pDst :128],setStep
266
267             .endif
268
269
270         .endif
271
272         BGT     grpZeroSetLoop\name
273
274
275         @// reset pSrc to pDst for the next stage
276         SUB     pSrc,pDst,pointStep                     @// pDst -= grpSize
277         MOV     pDst,pPingPongBuf
278
279
280         .endm
281
282
283
284         M_START armSP_FFTFwd_CToC_SC16_Radix4_fs_OutOfPlace_unsafe,r4
285         FFTSTAGE "FALSE","FALSE",FWD
286         M_END
287
288
289
290         M_START armSP_FFTInv_CToC_SC16_Radix4_fs_OutOfPlace_unsafe,r4
291         FFTSTAGE "FALSE","TRUE",INV
292         M_END
293
294
295         M_START armSP_FFTFwd_CToC_SC16_Sfs_Radix4_fs_OutOfPlace_unsafe,r4
296         FFTSTAGE "TRUE","FALSE",FWDSFS
297         M_END
298
299
300         M_START armSP_FFTInv_CToC_SC16_Sfs_Radix4_fs_OutOfPlace_unsafe,r4
301         FFTSTAGE "TRUE","TRUE",INVSFS
302         M_END
303
304
305
306
307
308     .end