Upstream version 10.38.222.0
[platform/framework/web/crosswalk.git] / src / third_party / libvpx / source / libvpx / vpx_scale / arm / neon / vp8_vpxyv12_copysrcframe_func_neon.asm
1 ;
2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 ;
4 ;  Use of this source code is governed by a BSD-style license
5 ;  that can be found in the LICENSE file in the root of the source
6 ;  tree. An additional intellectual property rights grant can be found
7 ;  in the file PATENTS.  All contributing project authors may
8 ;  be found in the AUTHORS file in the root of the source tree.
9 ;
10
11
12     EXPORT  |vp8_yv12_copy_src_frame_func_neon|
13     ARM
14     REQUIRE8
15     PRESERVE8
16
17     INCLUDE vpx_scale_asm_offsets.asm
18
19     AREA ||.text||, CODE, READONLY, ALIGN=2
20 ;Note: This function is used to copy source data in src_buffer[i] at beginning
21 ;of the encoding. The buffer has a width and height of cpi->oxcf.Width and
22 ;cpi->oxcf.Height, which can be ANY numbers(NOT always multiples of 16 or 4).
23
24 ;void vp8_yv12_copy_src_frame_func_neon(const YV12_BUFFER_CONFIG *src_ybc,
25 ;                                       YV12_BUFFER_CONFIG *dst_ybc);
26
27 |vp8_yv12_copy_src_frame_func_neon| PROC
28     push            {r4 - r11, lr}
29     vpush           {d8 - d15}
30
31     ;Copy Y plane
32     ldr             r4, [r0, #yv12_buffer_config_y_height]
33     ldr             r5, [r0, #yv12_buffer_config_y_width]
34     ldr             r6, [r0, #yv12_buffer_config_y_stride]
35     ldr             r7, [r1, #yv12_buffer_config_y_stride]
36     ldr             r2, [r0, #yv12_buffer_config_y_buffer]       ;srcptr1
37     ldr             r3, [r1, #yv12_buffer_config_y_buffer]       ;dstptr1
38
39     add             r10, r2, r6             ;second row src
40     add             r11, r3, r7             ;second row dst
41     mov             r6, r6, lsl #1
42     mov             r7, r7, lsl #1
43     sub             r6, r6, r5              ;adjust stride
44     sub             r7, r7, r5
45
46     ; copy two rows at one time
47     mov             lr, r4, lsr #1
48
49 cp_src_to_dst_height_loop
50     mov             r12, r5
51
52 cp_width_128_loop
53     vld1.8          {q0, q1}, [r2]!
54     vld1.8          {q4, q5}, [r10]!
55     vld1.8          {q2, q3}, [r2]!
56     vld1.8          {q6, q7}, [r10]!
57     vld1.8          {q8, q9}, [r2]!
58     vld1.8          {q12, q13}, [r10]!
59     vld1.8          {q10, q11}, [r2]!
60     vld1.8          {q14, q15}, [r10]!
61     sub             r12, r12, #128
62     cmp             r12, #128
63     vst1.8          {q0, q1}, [r3]!
64     vst1.8          {q4, q5}, [r11]!
65     vst1.8          {q2, q3}, [r3]!
66     vst1.8          {q6, q7}, [r11]!
67     vst1.8          {q8, q9}, [r3]!
68     vst1.8          {q12, q13}, [r11]!
69     vst1.8          {q10, q11}, [r3]!
70     vst1.8          {q14, q15}, [r11]!
71     bhs             cp_width_128_loop
72
73     cmp             r12, #0
74     beq             cp_width_done
75
76 cp_width_8_loop
77     vld1.8          {d0}, [r2]!
78     vld1.8          {d1}, [r10]!
79     sub             r12, r12, #8
80     cmp             r12, #8
81     vst1.8          {d0}, [r3]!
82     vst1.8          {d1}, [r11]!
83     bhs             cp_width_8_loop
84
85     cmp             r12, #0
86     beq             cp_width_done
87
88 cp_width_1_loop
89     ldrb            r8, [r2], #1
90     subs            r12, r12, #1
91     strb            r8, [r3], #1
92     ldrb            r8, [r10], #1
93     strb            r8, [r11], #1
94     bne             cp_width_1_loop
95
96 cp_width_done
97     subs            lr, lr, #1
98     add             r2, r2, r6
99     add             r3, r3, r7
100     add             r10, r10, r6
101     add             r11, r11, r7
102     bne             cp_src_to_dst_height_loop
103
104 ;copy last line for Y if y_height is odd
105     tst             r4, #1
106     beq             cp_width_done_1
107     mov             r12, r5
108
109 cp_width_128_loop_1
110     vld1.8          {q0, q1}, [r2]!
111     vld1.8          {q2, q3}, [r2]!
112     vld1.8          {q8, q9}, [r2]!
113     vld1.8          {q10, q11}, [r2]!
114     sub             r12, r12, #128
115     cmp             r12, #128
116     vst1.8          {q0, q1}, [r3]!
117     vst1.8          {q2, q3}, [r3]!
118     vst1.8          {q8, q9}, [r3]!
119     vst1.8          {q10, q11}, [r3]!
120     bhs             cp_width_128_loop_1
121
122     cmp             r12, #0
123     beq             cp_width_done_1
124
125 cp_width_8_loop_1
126     vld1.8          {d0}, [r2]!
127     sub             r12, r12, #8
128     cmp             r12, #8
129     vst1.8          {d0}, [r3]!
130     bhs             cp_width_8_loop_1
131
132     cmp             r12, #0
133     beq             cp_width_done_1
134
135 cp_width_1_loop_1
136     ldrb            r8, [r2], #1
137     subs            r12, r12, #1
138     strb            r8, [r3], #1
139     bne             cp_width_1_loop_1
140 cp_width_done_1
141
142 ;Copy U & V planes
143     ldr             r4, [r0, #yv12_buffer_config_uv_height]
144     ldr             r5, [r0, #yv12_buffer_config_uv_width]
145     ldr             r6, [r0, #yv12_buffer_config_uv_stride]
146     ldr             r7, [r1, #yv12_buffer_config_uv_stride]
147     ldr             r2, [r0, #yv12_buffer_config_u_buffer]       ;srcptr1
148     ldr             r3, [r1, #yv12_buffer_config_u_buffer]       ;dstptr1
149
150     add             r10, r2, r6             ;second row src
151     add             r11, r3, r7             ;second row dst
152     mov             r6, r6, lsl #1
153     mov             r7, r7, lsl #1
154     sub             r6, r6, r5              ;adjust stride
155     sub             r7, r7, r5
156
157     mov             r9, #2
158
159 cp_uv_loop
160     ;copy two rows at one time
161     mov             lr, r4, lsr #1
162
163 cp_src_to_dst_height_uv_loop
164     mov             r12, r5
165
166 cp_width_uv_64_loop
167     vld1.8          {q0, q1}, [r2]!
168     vld1.8          {q4, q5}, [r10]!
169     vld1.8          {q2, q3}, [r2]!
170     vld1.8          {q6, q7}, [r10]!
171     sub             r12, r12, #64
172     cmp             r12, #64
173     vst1.8          {q0, q1}, [r3]!
174     vst1.8          {q4, q5}, [r11]!
175     vst1.8          {q2, q3}, [r3]!
176     vst1.8          {q6, q7}, [r11]!
177     bhs             cp_width_uv_64_loop
178
179     cmp             r12, #0
180     beq             cp_width_uv_done
181
182 cp_width_uv_8_loop
183     vld1.8          {d0}, [r2]!
184     vld1.8          {d1}, [r10]!
185     sub             r12, r12, #8
186     cmp             r12, #8
187     vst1.8          {d0}, [r3]!
188     vst1.8          {d1}, [r11]!
189     bhs             cp_width_uv_8_loop
190
191     cmp             r12, #0
192     beq             cp_width_uv_done
193
194 cp_width_uv_1_loop
195     ldrb            r8, [r2], #1
196     subs            r12, r12, #1
197     strb            r8, [r3], #1
198     ldrb            r8, [r10], #1
199     strb            r8, [r11], #1
200     bne             cp_width_uv_1_loop
201
202 cp_width_uv_done
203     subs            lr, lr, #1
204     add             r2, r2, r6
205     add             r3, r3, r7
206     add             r10, r10, r6
207     add             r11, r11, r7
208     bne             cp_src_to_dst_height_uv_loop
209
210 ;copy last line for U & V if uv_height is odd
211     tst             r4, #1
212     beq             cp_width_uv_done_1
213     mov             r12, r5
214
215 cp_width_uv_64_loop_1
216     vld1.8          {q0, q1}, [r2]!
217     vld1.8          {q2, q3}, [r2]!
218     sub             r12, r12, #64
219     cmp             r12, #64
220     vst1.8          {q0, q1}, [r3]!
221     vst1.8          {q2, q3}, [r3]!
222     bhs             cp_width_uv_64_loop_1
223
224     cmp             r12, #0
225     beq             cp_width_uv_done_1
226
227 cp_width_uv_8_loop_1
228     vld1.8          {d0}, [r2]!
229     sub             r12, r12, #8
230     cmp             r12, #8
231     vst1.8          {d0}, [r3]!
232     bhs             cp_width_uv_8_loop_1
233
234     cmp             r12, #0
235     beq             cp_width_uv_done_1
236
237 cp_width_uv_1_loop_1
238     ldrb            r8, [r2], #1
239     subs            r12, r12, #1
240     strb            r8, [r3], #1
241     bne             cp_width_uv_1_loop_1
242 cp_width_uv_done_1
243
244     subs            r9, r9, #1
245     ldrne           r2, [r0, #yv12_buffer_config_v_buffer]      ;srcptr1
246     ldrne           r3, [r1, #yv12_buffer_config_v_buffer]      ;dstptr1
247     ldrne           r10, [r0, #yv12_buffer_config_uv_stride]
248     ldrne           r11, [r1, #yv12_buffer_config_uv_stride]
249
250     addne           r10, r2, r10                ;second row src
251     addne           r11, r3, r11                ;second row dst
252
253     bne             cp_uv_loop
254
255     vpop            {d8 - d15}
256     pop             {r4 - r11, pc}
257
258     ENDP
259     END