2 ; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
12 EXPORT |vp8_subtract_mby_armv6|
13 EXPORT |vp8_subtract_mbuv_armv6|
14 EXPORT |vp8_subtract_b_armv6|
16 INCLUDE vp9_asm_enc_offsets.asm
22 AREA ||.text||, CODE, READONLY, ALIGN=2
27 |vp8_subtract_b_armv6| PROC
31 ldr r4, [r0, #vp8_block_base_src]
32 ldr r5, [r0, #vp8_block_src]
33 ldr r6, [r0, #vp8_block_src_diff]
36 ldr r7, [r0, #vp8_block_src_stride]
37 add r3, r3, r5 ; src = *base_src + src
38 ldr r8, [r1, #vp8_blockd_predictor]
40 mov r9, #4 ; loop count
44 ldr r0, [r3], r7 ; src
45 ldr r1, [r8], r2 ; pred
47 uxtb16 r4, r0 ; [s2 | s0]
48 uxtb16 r5, r1 ; [p2 | p0]
49 uxtb16 r0, r0, ror #8 ; [s3 | s1]
50 uxtb16 r1, r1, ror #8 ; [p3 | p1]
52 usub16 r4, r4, r5 ; [d2 | d0]
53 usub16 r5, r0, r1 ; [d3 | d1]
55 subs r9, r9, #1 ; decrement loop counter
57 pkhbt r0, r4, r5, lsl #16 ; [d1 | d0]
58 pkhtb r1, r5, r4, asr #16 ; [d3 | d2]
60 str r0, [r6, #0] ; diff
61 str r1, [r6, #4] ; diff
63 add r6, r6, r2, lsl #1 ; update diff pointer
73 ; r1 unsigned char *usrc
74 ; r2 unsigned char *vsrc
75 ; r3 unsigned char *pred
77 |vp8_subtract_mbuv_armv6| PROC
79 stmfd sp!, {r4-r12, lr}
81 add r0, r0, #512 ; set *diff point to Cb
82 add r3, r3, #256 ; set *pred point to Cb
84 mov r4, #8 ; loop count
85 ldr r5, [sp, #40] ; stride
89 ldr r6, [r1] ; src (A)
90 ldr r7, [r3], #4 ; pred (A)
92 uxtb16 r8, r6 ; [s2 | s0] (A)
93 uxtb16 r9, r7 ; [p2 | p0] (A)
94 uxtb16 r10, r6, ror #8 ; [s3 | s1] (A)
95 uxtb16 r11, r7, ror #8 ; [p3 | p1] (A)
97 usub16 r6, r8, r9 ; [d2 | d0] (A)
98 usub16 r7, r10, r11 ; [d3 | d1] (A)
100 ldr r10, [r1, #4] ; src (B)
101 ldr r11, [r3], #4 ; pred (B)
103 pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A)
104 pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A)
106 str r8, [r0], #4 ; diff (A)
107 uxtb16 r8, r10 ; [s2 | s0] (B)
108 str r9, [r0], #4 ; diff (A)
110 uxtb16 r9, r11 ; [p2 | p0] (B)
111 uxtb16 r10, r10, ror #8 ; [s3 | s1] (B)
112 uxtb16 r11, r11, ror #8 ; [p3 | p1] (B)
114 usub16 r6, r8, r9 ; [d2 | d0] (B)
115 usub16 r7, r10, r11 ; [d3 | d1] (B)
117 add r1, r1, r5 ; update usrc pointer
119 pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B)
120 pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B)
122 str r8, [r0], #4 ; diff (B)
123 subs r4, r4, #1 ; update loop counter
124 str r9, [r0], #4 ; diff (B)
128 mov r4, #8 ; loop count
132 ldr r6, [r2] ; src (A)
133 ldr r7, [r3], #4 ; pred (A)
135 uxtb16 r8, r6 ; [s2 | s0] (A)
136 uxtb16 r9, r7 ; [p2 | p0] (A)
137 uxtb16 r10, r6, ror #8 ; [s3 | s1] (A)
138 uxtb16 r11, r7, ror #8 ; [p3 | p1] (A)
140 usub16 r6, r8, r9 ; [d2 | d0] (A)
141 usub16 r7, r10, r11 ; [d3 | d1] (A)
143 ldr r10, [r2, #4] ; src (B)
144 ldr r11, [r3], #4 ; pred (B)
146 pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A)
147 pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A)
149 str r8, [r0], #4 ; diff (A)
150 uxtb16 r8, r10 ; [s2 | s0] (B)
151 str r9, [r0], #4 ; diff (A)
153 uxtb16 r9, r11 ; [p2 | p0] (B)
154 uxtb16 r10, r10, ror #8 ; [s3 | s1] (B)
155 uxtb16 r11, r11, ror #8 ; [p3 | p1] (B)
157 usub16 r6, r8, r9 ; [d2 | d0] (B)
158 usub16 r7, r10, r11 ; [d3 | d1] (B)
160 add r2, r2, r5 ; update vsrc pointer
162 pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B)
163 pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B)
165 str r8, [r0], #4 ; diff (B)
166 subs r4, r4, #1 ; update loop counter
167 str r9, [r0], #4 ; diff (B)
171 ldmfd sp!, {r4-r12, pc}
177 ; r1 unsigned char *src
178 ; r2 unsigned char *pred
180 |vp8_subtract_mby_armv6| PROC
186 ldr r6, [r1] ; src (A)
187 ldr r7, [r2], #4 ; pred (A)
189 uxtb16 r8, r6 ; [s2 | s0] (A)
190 uxtb16 r9, r7 ; [p2 | p0] (A)
191 uxtb16 r10, r6, ror #8 ; [s3 | s1] (A)
192 uxtb16 r11, r7, ror #8 ; [p3 | p1] (A)
194 usub16 r6, r8, r9 ; [d2 | d0] (A)
195 usub16 r7, r10, r11 ; [d3 | d1] (A)
197 ldr r10, [r1, #4] ; src (B)
198 ldr r11, [r2], #4 ; pred (B)
200 pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (A)
201 pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (A)
203 str r8, [r0], #4 ; diff (A)
204 uxtb16 r8, r10 ; [s2 | s0] (B)
205 str r9, [r0], #4 ; diff (A)
207 uxtb16 r9, r11 ; [p2 | p0] (B)
208 uxtb16 r10, r10, ror #8 ; [s3 | s1] (B)
209 uxtb16 r11, r11, ror #8 ; [p3 | p1] (B)
211 usub16 r6, r8, r9 ; [d2 | d0] (B)
212 usub16 r7, r10, r11 ; [d3 | d1] (B)
214 ldr r10, [r1, #8] ; src (C)
215 ldr r11, [r2], #4 ; pred (C)
217 pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (B)
218 pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (B)
220 str r8, [r0], #4 ; diff (B)
221 uxtb16 r8, r10 ; [s2 | s0] (C)
222 str r9, [r0], #4 ; diff (B)
224 uxtb16 r9, r11 ; [p2 | p0] (C)
225 uxtb16 r10, r10, ror #8 ; [s3 | s1] (C)
226 uxtb16 r11, r11, ror #8 ; [p3 | p1] (C)
228 usub16 r6, r8, r9 ; [d2 | d0] (C)
229 usub16 r7, r10, r11 ; [d3 | d1] (C)
231 ldr r10, [r1, #12] ; src (D)
232 ldr r11, [r2], #4 ; pred (D)
234 pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (C)
235 pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (C)
237 str r8, [r0], #4 ; diff (C)
238 uxtb16 r8, r10 ; [s2 | s0] (D)
239 str r9, [r0], #4 ; diff (C)
241 uxtb16 r9, r11 ; [p2 | p0] (D)
242 uxtb16 r10, r10, ror #8 ; [s3 | s1] (D)
243 uxtb16 r11, r11, ror #8 ; [p3 | p1] (D)
245 usub16 r6, r8, r9 ; [d2 | d0] (D)
246 usub16 r7, r10, r11 ; [d3 | d1] (D)
248 add r1, r1, r3 ; update src pointer
250 pkhbt r8, r6, r7, lsl #16 ; [d1 | d0] (D)
251 pkhtb r9, r7, r6, asr #16 ; [d3 | d2] (D)
253 str r8, [r0], #4 ; diff (D)
254 subs r4, r4, #1 ; update loop counter
255 str r9, [r0], #4 ; diff (D)