Upstream version 9.38.198.0
[platform/framework/web/crosswalk.git] / src / third_party / openmax_dl / dl / sp / src / arm / neon / omxSP_FFTFwd_CToC_SC16_Sfs_s.S
1 @//
2 @//  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3 @//
4 @//  Use of this source code is governed by a BSD-style license
5 @//  that can be found in the LICENSE file in the root of the source
6 @//  tree. An additional intellectual property rights grant can be found
7 @//  in the file PATENTS.  All contributing project authors may
8 @//  be found in the AUTHORS file in the root of the source tree.
9 @//
10 @//  This file was originally licensed as follows. It has been
11 @//  relicensed with permission from the copyright holders.
12
13 @//
14 @//
15 @// File Name:  omxSP_FFTFwd_CToC_SC16_Sfs_s.s
16 @// OpenMAX DL: v1.0.2
17 @// Last Modified Revision:   6729
18 @// Last Modified Date:       Tue, 17 Jul 2007
19 @//
20 @// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
21 @//
22 @//
23 @//
24 @// Description:
25 @// Compute an inverse FFT for a complex signal
26 @//
27 @//
28
29
30 @// Include standard headers
31
32 #include "dl/api/arm/armCOMM_s.h"
33 #include "dl/api/arm/omxtypes_s.h"
34
35
36 @// Import symbols required from other files
37 @// (For example tables)
38
39         .extern  armSP_FFTFwd_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe
40         .extern  armSP_FFTFwd_CToC_SC16_Radix2_fs_OutOfPlace_unsafe
41         .extern  armSP_FFTFwd_CToC_SC16_Radix4_fs_OutOfPlace_unsafe
42         .extern  armSP_FFTFwd_CToC_SC16_Radix4_ls_OutOfPlace_unsafe
43         .extern  armSP_FFTFwd_CToC_SC16_Radix8_fs_OutOfPlace_unsafe
44         .extern  armSP_FFTFwd_CToC_SC16_Radix4_OutOfPlace_unsafe
45         .extern  armSP_FFTFwd_CToC_SC16_Sfs_Radix4_fs_OutOfPlace_unsafe
46         .extern  armSP_FFTFwd_CToC_SC16_Sfs_Radix4_ls_OutOfPlace_unsafe
47         .extern  armSP_FFTFwd_CToC_SC16_Sfs_Radix8_fs_OutOfPlace_unsafe
48         .extern  armSP_FFTFwd_CToC_SC16_Sfs_Radix4_OutOfPlace_unsafe
49         .extern  armSP_FFTFwd_CToC_SC16_Sfs_Radix2_OutOfPlace_unsafe
50         .extern  armSP_FFTFwd_CToC_SC16_Radix2_OutOfPlace_unsafe
51         .extern  armSP_FFTFwd_CToC_SC16_Sfs_Radix2_ls_OutOfPlace_unsafe
52         .extern  armSP_FFTFwd_CToC_SC16_Radix2_ls_OutOfPlace_unsafe
53
54 @// Set debugging level
55 @//DEBUG_ON    SETL {TRUE}
56
57
58
59 @// Guarding implementation by the processor name
60
61
62
63 @// Guarding implementation by the processor name
64
65
66     .extern  armSP_FFTFwd_CToC_SC16_Radix2_ps_OutOfPlace_unsafe
67     .extern  armSP_FFTFwd_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe
68
69 @//Input Registers
70
71 #define pSrc            r0
72 #define pDst            r1
73 #define pFFTSpec                r2
74 #define scale           r3
75
76
77 @// Output registers
78 #define result          r0
79
80 @//Local Scratch Registers
81 #define argTwiddle              r1
82 #define argDst          r2
83 #define argScale                r4
84 #define pTwiddle                r4
85 #define tmpOrder                r4
86 #define pOut            r5
87 #define subFFTSize              r7
88 #define subFFTNum               r6
89 #define N               r6
90 #define order           r14
91 #define diff            r9
92 @// Total num of radix stages required to comple the FFT
93 #define count           r8
94 #define x0r             r4
95 #define x0i             r5
96 #define diffMinusOne            r2
97 #define round           r3
98
99 @// Neon registers
100
101 #define dX0     D0.S16
102 #define dShift  D1.S16
103 #define dX0S32  D0.S32
104
105
106
107     @// Allocate stack memory required by the function
108         M_ALLOC4        diffOnStack, 4
109
110     @// Write function header
111         M_START     omxSP_FFTFwd_CToC_SC16_Sfs,r11,d15
112
113 @ Structure offsets for the FFTSpec
114         .set    ARMsFFTSpec_N, 0
115         .set    ARMsFFTSpec_pBitRev, 4
116         .set    ARMsFFTSpec_pTwiddle, 8
117         .set    ARMsFFTSpec_pBuf, 12
118
119         @// Define stack arguments
120
121         @// Read the size from structure and take log
122         LDR     N, [pFFTSpec, #ARMsFFTSpec_N]
123
124         @// Read other structure parameters
125         LDR     pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle]
126         LDR     pOut, [pFFTSpec, #ARMsFFTSpec_pBuf]
127
128         CLZ     order,N                             @// N = 2^order
129         RSB     order,order,#31
130         MOV     subFFTSize,#1
131         @//MOV     subFFTNum,N
132
133         CMP     order,#3
134         BGT     orderGreaterthan3                   @// order > 3
135
136         CMP     order,#1
137         BGE     orderGreaterthan0                   @// order > 0
138         M_STR   scale, diffOnStack,LT               @// order = 0
139         LDRLT   x0r,[pSrc]
140         STRLT   x0r,[pDst]
141         MOVLT   pSrc,pDst
142         BLT     FFTEnd
143
144 orderGreaterthan0:
145         @// set the buffers appropriately for various orders
146         CMP     order,#2
147         MOVNE   argDst,pDst
148         MOVEQ   argDst,pOut
149         MOVEQ   pOut,pDst                           @// Pass the first stage destination in RN5
150         MOV     argTwiddle,pTwiddle
151
152         SUBS     diff,scale,order
153         M_STR   diff,diffOnStack
154         MOVGT   scale,order
155         @// Now scale <= order
156
157         CMP     order,#1
158         BGT     orderGreaterthan1
159         SUBS    scale,scale,#1
160         BLEQ    armSP_FFTFwd_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe  @// order = 1
161         BLLT    armSP_FFTFwd_CToC_SC16_Radix2_fs_OutOfPlace_unsafe      @// order = 1
162         B       FFTEnd
163
164 orderGreaterthan1:
165         CMP     order,#2
166         MOV     argScale,scale
167         BGT     orderGreaterthan2
168         SUBS    argScale,argScale,#1
169         BLGE    armSP_FFTFwd_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe      @// order =2
170         BLLT    armSP_FFTFwd_CToC_SC16_Radix2_fs_OutOfPlace_unsafe
171         SUBS    argScale,argScale,#1
172         BLEQ    armSP_FFTFwd_CToC_SC16_Sfs_Radix2_ls_OutOfPlace_unsafe
173         BLLT    armSP_FFTFwd_CToC_SC16_Radix2_ls_OutOfPlace_unsafe
174         B       FFTEnd
175
176 orderGreaterthan2:                                                                     @// order =3
177         SUBS    argScale,argScale,#1
178         BLGE    armSP_FFTFwd_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe
179         BLLT    armSP_FFTFwd_CToC_SC16_Radix2_fs_OutOfPlace_unsafe
180         SUBS    argScale,argScale,#1
181         BLGE    armSP_FFTFwd_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe
182         BLLT    armSP_FFTFwd_CToC_SC16_Radix2_ps_OutOfPlace_unsafe
183         SUBS    argScale,argScale,#1
184         BLEQ    armSP_FFTFwd_CToC_SC16_Sfs_Radix2_ls_OutOfPlace_unsafe
185         BLLT    armSP_FFTFwd_CToC_SC16_Radix2_ls_OutOfPlace_unsafe
186         B       FFTEnd
187
188
189 orderGreaterthan3:
190         @// check scale = 0 or scale = order
191         SUBS    diff, scale, order                 @// scale > order
192         MOVGT   scale,order
193         BGE     specialScaleCase                   @// scale = 0 or scale = order
194         CMP     scale,#0
195         BEQ     specialScaleCase
196         B       generalScaleCase
197
198 specialScaleCase:                                           @//  scale = 0 or scale = order  and order > 3
199
200         TST     order, #2                           @// Set input args to fft stages
201         MOVNE   argDst,pDst
202         MOVEQ   argDst,pOut
203         MOVEQ   pOut,pDst                           @// Pass the first stage destination in RN5
204         MOV     argTwiddle,pTwiddle
205
206         CMP      diff,#0
207         M_STR    diff, diffOnStack
208         BGE      scaleEqualsOrder
209
210         @//check for even or odd order
211         @// NOTE: The following combination of BL's would work fine eventhough the first
212         @// BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside
213         @// armSP_FFTFwd_CToC_SC16_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ
214
215         TST     order,#0x00000001
216         BLEQ    armSP_FFTFwd_CToC_SC16_Radix4_fs_OutOfPlace_unsafe
217         BLNE    armSP_FFTFwd_CToC_SC16_Radix8_fs_OutOfPlace_unsafe
218
219         CMP        subFFTNum,#4
220         BLT     FFTEnd
221
222 unscaledRadix4Loop:
223         BEQ        lastStageUnscaledRadix4
224         BL        armSP_FFTFwd_CToC_SC16_Radix4_OutOfPlace_unsafe
225          CMP        subFFTNum,#4
226          B        unscaledRadix4Loop
227
228 lastStageUnscaledRadix4:
229         BL      armSP_FFTFwd_CToC_SC16_Radix4_ls_OutOfPlace_unsafe
230         B        FFTEnd
231
232 scaleEqualsOrder:
233         @//check for even or odd order
234         @// NOTE: The following combination of BL's would work fine eventhough the first
235         @// BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside
236         @// armSP_FFTFwd_CToC_SC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ
237
238         TST     order,#0x00000001
239         BLEQ    armSP_FFTFwd_CToC_SC16_Sfs_Radix4_fs_OutOfPlace_unsafe
240         BLNE    armSP_FFTFwd_CToC_SC16_Sfs_Radix8_fs_OutOfPlace_unsafe
241
242         CMP        subFFTNum,#4
243         BLT     FFTEnd
244
245 scaledRadix4Loop:
246         BEQ        lastStageScaledRadix4
247         BL        armSP_FFTFwd_CToC_SC16_Sfs_Radix4_OutOfPlace_unsafe
248          CMP        subFFTNum,#4
249          B        scaledRadix4Loop
250
251 lastStageScaledRadix4:
252         BL      armSP_FFTFwd_CToC_SC16_Sfs_Radix4_ls_OutOfPlace_unsafe
253         B        FFTEnd
254
255
256
257 generalScaleCase:                                               @// 0 < scale < order and order > 3
258         @// Determine the correct destination buffer
259         SUB     diff,order,scale
260         TST     diff,#0x01
261         ADDEQ   count,scale,diff,LSR #1         @// count = scale + (order - scale)/2
262         MOVNE   count,order
263         TST     count,#0x01                     @// Is count even or odd ?
264
265         MOVNE   argDst,pDst                     @// Set input args to fft stages
266         MOVEQ   argDst,pOut
267         MOVEQ   pOut,pDst                       @// Pass the first stage destination in RN5
268         MOV     argTwiddle,pTwiddle
269
270         CMP     diff,#1
271         M_STR   diff, diffOnStack
272         BEQ     scaleps                         @// scaling including a radix2_ps stage
273
274         MOV     argScale,scale                  @// Put scale in RN4 so as to save and restore
275         BL      armSP_FFTFwd_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe     @// scaled first stage
276         SUBS    argScale,argScale,#1
277
278 scaledRadix2Loop:
279         BLGT    armSP_FFTFwd_CToC_SC16_Sfs_Radix2_OutOfPlace_unsafe
280         SUBS    argScale,argScale,#1            @// save and restore scale (RN4) in the scaled stages
281         BGT     scaledRadix2Loop
282         B       outScale
283
284 scaleps:
285         SUB     argScale,scale,#1                   @// order>3 and diff=1 => scale >= 3
286         BL      armSP_FFTFwd_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe     @// scaled first stage
287         SUBS    argScale,argScale,#1
288
289 scaledRadix2psLoop:
290         BEQ     scaledRadix2psStage
291         BLGT    armSP_FFTFwd_CToC_SC16_Sfs_Radix2_OutOfPlace_unsafe
292         SUBS    argScale,argScale,#1            @// save and restore scale (RN4) in the scaled stages
293         BGE     scaledRadix2psLoop
294
295 scaledRadix2psStage:
296         BL      armSP_FFTFwd_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe
297         B       generalLastStageUnscaledRadix2
298
299
300 outScale:
301         M_LDR   diff, diffOnStack
302         @//check for even or odd order
303         TST     diff,#0x00000001
304         BEQ     generalUnscaledRadix4Loop
305         B       unscaledRadix2Loop
306
307 generalUnscaledRadix4Loop:
308         CMP        subFFTNum,#4
309          BEQ        generalLastStageUnscaledRadix4
310          BL        armSP_FFTFwd_CToC_SC16_Radix4_OutOfPlace_unsafe
311          B        generalUnscaledRadix4Loop
312
313 generalLastStageUnscaledRadix4:
314         BL      armSP_FFTFwd_CToC_SC16_Radix4_ls_OutOfPlace_unsafe
315         B        End
316
317 unscaledRadix2Loop:
318         CMP        subFFTNum,#4
319          BEQ        generalLastTwoStagesUnscaledRadix2
320          BL        armSP_FFTFwd_CToC_SC16_Radix2_OutOfPlace_unsafe
321          B        unscaledRadix2Loop
322
323 generalLastTwoStagesUnscaledRadix2:
324         BL      armSP_FFTFwd_CToC_SC16_Radix2_ps_OutOfPlace_unsafe
325 generalLastStageUnscaledRadix2:
326         BL      armSP_FFTFwd_CToC_SC16_Radix2_ls_OutOfPlace_unsafe
327         B        End
328
329
330 FFTEnd:                                               @// Does only the scaling
331
332         M_LDR   diff, diffOnStack
333         CMP     diff,#0
334         BLE     End
335
336         RSB     diff,diff,#0                        @// to use VRSHL for right shift by a variable
337         VDUP    dShift,diff
338
339 scaleFFTData:                                           @// N = subFFTSize  ; dataptr = pDst  ; scale = diff
340         VLD1    {dX0S32[0]},[pSrc]                        @// pSrc contains pDst pointer
341         SUBS    subFFTSize,subFFTSize,#1
342         VRSHL   dX0,dShift
343         VST1    {dX0S32[0]},[pSrc]!
344
345         BGT     scaleFFTData
346
347
348
349 End:
350         @// Set return value
351         MOV     result, #OMX_Sts_NoErr
352
353         @// Write function tail
354         M_END
355
356     .end