Upstream version 9.38.198.0
[platform/framework/web/crosswalk.git] / src / third_party / openmax_dl / dl / sp / src / arm / neon / armSP_FFT_CToC_SC16_Radix2_ps_unsafe_s.S
1 @//
2 @//  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3 @//
4 @//  Use of this source code is governed by a BSD-style license
5 @//  that can be found in the LICENSE file in the root of the source
6 @//  tree. An additional intellectual property rights grant can be found
7 @//  in the file PATENTS.  All contributing project authors may
8 @//  be found in the AUTHORS file in the root of the source tree.
9 @//
10 @//  This file was originally licensed as follows. It has been
11 @//  relicensed with permission from the copyright holders.
12
13 @//
14 @//
15 @// File Name:  armSP_FFT_CToC_SC16_Radix2_ps_unsafe_s.s
16 @// OpenMAX DL: v1.0.2
17 @// Last Modified Revision:   6740
18 @// Last Modified Date:       Wed, 18 Jul 2007
19 @//
20 @// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
21 @//
22 @//
23 @//
24 @// Description:
25 @// Compute a Radix 2 FFT stage for a N point complex signal
26 @//
27 @//
28
29
30 @// Include standard headers
31
32 #include "dl/api/arm/armCOMM_s.h"
33 #include "dl/api/arm/omxtypes_s.h"
34
35
36 @// Import symbols required from other files
37 @// (For example tables)
38
39
40
41
42 @// Set debugging level
43 @//DEBUG_ON    SETL {TRUE}
44
45
46
47
48 @// Guarding implementation by the processor name
49
50
51 @//Input Registers
52
53 #define pSrc                            r0
54 #define pDst                            r2
55 #define pTwiddle                        r1
56 #define subFFTNum                       r6
57 #define subFFTSize                      r7
58
59
60 @//Output Registers
61
62
63 @//Local Scratch Registers
64
65 #define outPointStep                    r3
66 #define grpCount                        r4
67 #define dstStep                         r5
68 #define twStep                          r8
69 #define pTmp                            r4
70
71 @// Neon Registers
72
73 #define dW1S32                          D0.S32
74 #define dW2S32                          D1.S32
75 #define dW1                             D0.S16
76 #define dW2                             D1.S16
77
78 #define dX0                             D2.S16
79 #define dX1                             D3.S16
80 #define dX2                             D4.S16
81 #define dX3                             D5.S16
82 #define dY0                             D6.S16
83 #define dY1                             D7.S16
84 #define dY2                             D8.S16
85 #define dY3                             D9.S16
86 #define qT0                             Q5.S32
87 #define qT1                             Q6.S32
88
89
90         .macro FFTSTAGE scaled, inverse, name
91
92         @// Define stack arguments
93
94
95         @// Update grpCount and grpSize rightaway inorder to reuse pGrpCount and pGrpSize regs
96
97
98         LSL     grpCount,subFFTSize,#1
99
100
101         @// update subFFTSize for the next stage
102         MOV     subFFTSize,grpCount
103
104         @// pOut0+1 increments pOut0 by 8 bytes
105         @// pOut0+outPointStep == increment of 4*outPointStep bytes = 2*size bytes
106         SMULBB  outPointStep,grpCount,subFFTNum
107         MOV     twStep,subFFTNum,LSL #1
108         LSR     subFFTNum,subFFTNum,#1                      @//grpSize
109
110
111         RSB      dstStep,outPointStep,#8
112
113
114         @// Note: pointStep is 8 in this case: so need of extra reg
115         @// Loop on the groups: 2 groups at a time
116
117 grpLoop\name:
118
119         VLD1     dW1S32[],[pTwiddle],twStep                @//[wi | wr]
120         VLD1     dW2S32[],[pTwiddle],twStep
121
122         @// Process the sets for each grp:  2 sets at a time (no set looping required)
123
124         VLD1    dX0,[pSrc]!            @// point0: of set0,set1 of grp0
125         VLD1    dX1,[pSrc]!            @// point1: of set0,set1 of grp0
126         VLD1    dX2,[pSrc]!            @// point0: of set0,set1 of grp1
127         VLD1    dX3,[pSrc]!            @// point1: of set0,set1 of grp1
128
129         SUBS    grpCount,grpCount,#4              @// decrement the loop counter
130         VUZP    dW1,dW2
131         VUZP    dX1,dX3
132
133         .ifeqs  "\inverse", "TRUE"
134             VMULL   qT0,dX1,dW1
135             VMLAL   qT0,dX3,dW2                       @// real part
136             VMULL   qT1,dX3,dW1
137             VMLSL   qT1,dX1,dW2                       @// imag part
138
139         .else
140             VMULL   qT0,dX1,dW1
141             VMLSL   qT0,dX3,dW2                       @// real part
142             VMULL   qT1,dX3,dW1
143             VMLAL   qT1,dX1,dW2                       @// imag part
144
145         .endif
146
147         VRSHRN  dX1,qT0,#15
148         VRSHRN  dX3,qT1,#15
149
150         VZIP    dX1,dX3
151
152
153         .ifeqs "\scaled", "TRUE"
154
155             VHSUB    dY0,dX0,dX1
156             VHADD    dY1,dX0,dX1
157             VHSUB    dY2,dX2,dX3
158             VHADD    dY3,dX2,dX3
159
160         .else
161
162             VSUB    dY0,dX0,dX1
163             VADD    dY1,dX0,dX1
164             VSUB    dY2,dX2,dX3
165             VADD    dY3,dX2,dX3
166
167
168
169         .endif
170
171         VST1    dY0,[pDst],outPointStep             @// point0: of set0,set1 of grp0
172         VST1    dY1,[pDst],dstStep                  @// dstStep = -outPointStep + 8
173         VST1    dY2,[pDst],outPointStep             @// point0: of set0,set1 of grp1
174         VST1    dY3,[pDst],dstStep                  @// point1: of set0,set1 of grp1
175
176
177         BGT     grpLoop\name
178
179
180         @// Reset and Swap pSrc and pDst for the next stage
181         MOV     pTmp,pDst
182         SUB     pDst,pSrc,outPointStep,LSL #1       @// pDst -= 2*size; pSrc -= 4*size bytes
183         SUB     pSrc,pTmp,outPointStep
184
185         @// Reset pTwiddle for the next stage
186         SUB     pTwiddle,pTwiddle,outPointStep      @// pTwiddle -= 2*size bytes
187
188         .endm
189
190
191
192         M_START armSP_FFTFwd_CToC_SC16_Radix2_ps_OutOfPlace_unsafe,r4
193         FFTSTAGE "FALSE","FALSE",FWD
194         M_END
195
196
197
198         M_START armSP_FFTInv_CToC_SC16_Radix2_ps_OutOfPlace_unsafe,r4
199         FFTSTAGE "FALSE","TRUE",INV
200         M_END
201
202
203
204         M_START armSP_FFTFwd_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe,r4
205         FFTSTAGE "TRUE","FALSE",FWDSFS
206         M_END
207
208
209
210         M_START armSP_FFTInv_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe,r4
211         FFTSTAGE "TRUE","TRUE",INVSFS
212         M_END
213
214
215
216     .end