2 ; Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
4 ; Use of this source code is governed by a BSD-style license and patent
5 ; grant that can be found in the LICENSE file in the root of the source
6 ; tree. All contributing project authors may be found in the AUTHORS
7 ; file in the root of the source tree.
10 EXPORT |vp8_short_walsh4x4_armv6|
16 AREA |.text|, CODE, READONLY ; name this block of code
18 ;short vp8_short_walsh4x4_armv6(short *input, short *output, int pitch)
19 |vp8_short_walsh4x4_armv6| PROC
21 stmdb sp!, {r4 - r11, lr}
23 mov r12, r2 ; ugh. not clean
24 ldr r2, [r0] ; [1 | 0]
25 ldr r3, [r0, #4] ; [3 | 2]
26 ldr r4, [r0, r12]! ; [5 | 4]
27 ldr r5, [r0, #4] ; [7 | 6]
28 ldr r6, [r0, r12]! ; [9 | 8]
29 ldr r7, [r0, #4] ; [11 | 10]
30 ldr r8, [r0, r12]! ; [13 | 12]
31 ldr r9, [r0, #4] ; [15 | 14]
33 qsubaddx r10, r2, r3 ; [c1|a1] [1-2 | 0+3]
34 qaddsubx r11, r2, r3 ; [b1|d1] [1+2 | 0-3]
35 qsubaddx r12, r4, r5 ; [c1|a1] [5-6 | 4+7]
36 qaddsubx lr, r4, r5 ; [b1|d1] [5+6 | 4-7]
38 qaddsubx r2, r10, r11 ; [1 | 2] [c1+d1 | a1-b1]
39 qaddsubx r3, r11, r10 ; [0 | 3] [b1+a1 | d1-c1]
40 qaddsubx r4, r12, lr ; [5 | 6] [c1+d1 | a1-b1]
41 qaddsubx r5, lr, r12 ; [4 | 7] [b1+a1 | d1-c1]
43 qsubaddx r10, r6, r7 ; [c1|a1] [9-10 | 8+11]
44 qaddsubx r11, r6, r7 ; [b1|d1] [9+10 | 8-11]
45 qsubaddx r12, r8, r9 ; [c1|a1] [13-14 | 12+15]
46 qaddsubx lr, r8, r9 ; [b1|d1] [13+14 | 12-15]
48 qaddsubx r6, r10, r11 ; [9 |10] [c1+d1 | a1-b1]
49 qaddsubx r7, r11, r10 ; [8 |11] [b1+a1 | d1-c1]
50 qaddsubx r8, r12, lr ; [13|14] [c1+d1 | a1-b1]
51 qaddsubx r9, lr, r12 ; [12|15] [b1+a1 | d1-c1]
53 ; first transform complete
55 qadd16 r10, r3, r9 ; a1 [0+12 | 3+15]
56 qadd16 r11, r5, r7 ; b1 [4+8 | 7+11]
57 qsub16 r12, r5, r7 ; c1 [4-8 | 7-11]
58 qsub16 lr, r3, r9 ; d1 [0-12 | 3-15]
60 qadd16 r3, r10, r11 ; a2 [a1+b1] [0 | 3]
61 qadd16 r5, r12, lr ; b2 [c1+d1] [4 | 7]
62 qsub16 r7, r10, r11 ; c2 [a1-b1] [8 |11]
63 qsub16 r9, lr, r12 ; d2 [d1-c1] [12|15]
65 qadd16 r10, r2, r8 ; a1 [1+13 | 2+14]
66 qadd16 r11, r4, r6 ; b1 [5+9 | 6+10]
67 qsub16 r12, r4, r6 ; c1 [5-9 | 6-10]
68 qsub16 lr, r2, r8 ; d1 [1-13 | 2-14]
70 qadd16 r2, r10, r11 ; a2 [a1+b1] [1 | 2]
71 qadd16 r4, r12, lr ; b2 [c1+d1] [5 | 6]
72 qsub16 r6, r10, r11 ; c2 [a1-b1] [9 |10]
73 qsub16 r8, lr, r12 ; d2 [d1-c1] [13|14]
75 ; [a-d]2 += ([a-d]2 > 0)
78 addpl r10, r10, #1 ; [~0]
80 addpl r11, r11, #1 ; [~1]
81 lsl r11, r11, #15 ; [1 | x]
82 pkhtb r10, r11, r10, asr #1; [1 | 0]
86 addpl r11, r11, #0x10000 ; [~2]
88 addpl r12, r12, #0x10000 ; [~3]
89 asr r12, r12, #1 ; [3 | x]
90 pkhtb r11, r12, r11, asr #17; [3 | 2]
94 addpl r2, r2, #1 ; [~4]
96 addpl r3, r3, #1 ; [~5]
97 lsl r3, r3, #15 ; [5 | x]
98 pkhtb r2, r3, r2, asr #1 ; [5 | 4]
102 addpl r2, r2, #0x10000 ; [~6]
104 addpl r3, r3, #0x10000 ; [~7]
105 asr r3, r3, #1 ; [7 | x]
106 pkhtb r2, r3, r2, asr #17 ; [7 | 6]
110 addpl r2, r2, #1 ; [~8]
112 addpl r3, r3, #1 ; [~9]
113 lsl r3, r3, #15 ; [9 | x]
114 pkhtb r2, r3, r2, asr #1 ; [9 | 8]
118 addpl r2, r2, #0x10000 ; [~10]
120 addpl r3, r3, #0x10000 ; [~11]
121 asr r3, r3, #1 ; [11 | x]
122 pkhtb r2, r3, r2, asr #17 ; [11 | 10]
126 addpl r2, r2, #1 ; [~12]
128 addpl r3, r3, #1 ; [~13]
129 lsl r3, r3, #15 ; [13 | x]
130 pkhtb r2, r3, r2, asr #1 ; [13 | 12]
134 addpl r2, r2, #0x10000 ; [~14]
136 addpl r3, r3, #0x10000 ; [~15]
137 asr r3, r3, #1 ; [15 | x]
138 pkhtb r2, r3, r2, asr #17 ; [15 | 14]
141 ldmia sp!, {r4 - r11, pc}
142 ENDP ; |vp8_short_walsh4x4_armv6|