2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
12 EXPORT |vp8cx_pack_tokens_into_partitions_armv5|
13 IMPORT |vp8_validate_buffer_arm|
15 INCLUDE vp8_asm_enc_offsets.asm
21 AREA |.text|, CODE, READONLY
23 ; macro for validating write buffer position
24 ; needs vp8_writer in r0
25 ; start shall not be in r1
27 VALIDATE_POS $start, $pos
28 push {r0-r3, r12, lr} ; rest of regs are preserved by subroutine call
29 ldr r2, [r0, #vp8_writer_buffer_end]
30 ldr r3, [r0, #vp8_writer_error]
33 bl vp8_validate_buffer_arm
38 ; r1 unsigned char *cx_data
39 ; r2 const unsigned char *cx_data_end
41 ; s0 vp8_coef_encodings
43 ; s2 const vp8_tree_index *
45 |vp8cx_pack_tokens_into_partitions_armv5| PROC
49 ; Compute address of cpi->common.mb_rows
50 ldr r4, _VP8_COMP_common_
51 ldr r6, _VP8_COMMON_MBrows_
54 ldr r5, [r4, r6] ; load up mb_rows
56 str r5, [sp, #36] ; save mb_rows
57 str r1, [sp, #24] ; save ptr = cx_data
58 str r3, [sp, #20] ; save num_part
59 str r2, [sp, #8] ; save cx_data_end
61 ldr r4, _VP8_COMP_tplist_
63 ldr r7, [r4, #0] ; dereference cpi->tp_list
64 str r7, [sp, #32] ; store start of cpi->tp_list
66 ldr r11, _VP8_COMP_bc_ ; load up vp8_writer out of cpi
70 str r11, [sp, #28] ; i
73 ldr r2, _vp8_writer_sz_ ; load up sizeof(vp8_writer)
74 add r0, r2 ; bc[i + 1]
76 ldr r10, [sp, #24] ; ptr
77 ldr r5, [sp, #36] ; move mb_rows to the counting section
78 subs r5, r5, r11 ; move start point with each partition
82 ; Reset all of the VP8 Writer data for each partition that
87 str r3, [r0, #vp8_writer_buffer_end]
89 mov r2, #0 ; vp8_writer_lowvalue
90 mov r5, #255 ; vp8_writer_range
91 mvn r3, #23 ; vp8_writer_count
93 str r2, [r0, #vp8_writer_pos]
94 str r10, [r0, #vp8_writer_buffer]
96 ble end_partition ; if (mb_rows <= 0) end partition
100 ldr r1, [r7, #tokenlist_start]
101 ldr r9, [r7, #tokenlist_stop]
102 str r9, [sp, #0] ; save stop for later comparison
103 str r7, [sp, #16] ; tokenlist address for next time
107 ; actual work gets done here!
110 ldrb r6, [r1, #tokenextra_token] ; t
111 ldr r4, [sp, #80] ; vp8_coef_encodings
113 add r4, r4, r6, lsl #3 ; a = vp8_coef_encodings + t
114 ldr r9, [r1, #tokenextra_context_tree] ; pp
116 ldrb r7, [r1, #tokenextra_skip_eob_node]
118 ldr r6, [r4, #vp8_token_value] ; v
119 ldr r8, [r4, #vp8_token_len] ; n
121 ; vp8 specific skip_eob_node
124 subne r8, r8, #1 ; --n
126 rsb r4, r8, #32 ; 32-n
127 ldr r10, [sp, #88] ; vp8_coef_tree
129 ; v is kept in r12 during the token pack loop
130 lsl r12, r6, r4 ; r12 = v << 32 - n
134 ldrb r4, [r9, lr, asr #1] ; pp [i>>1]
135 sub r7, r5, #1 ; range-1
137 ; Decisions are made based on the bit value shifted
138 ; off of v, so set a flag here based on this.
139 ; This value is refered to as "bb"
140 lsls r12, r12, #1 ; bb = v >> n
141 mul r6, r4, r7 ; ((range-1) * pp[i>>1]))
143 ; bb can only be 0 or 1. So only execute this statement
144 ; if bb == 1, otherwise it will act like i + 0
145 addcs lr, lr, #1 ; i + bb
148 ldrsb lr, [r10, lr] ; i = vp8_coef_tree[i+bb]
149 add r4, r7, r6, lsr #8 ; 1 + (((range-1) * pp[i>>1]) >> 8)
151 addcs r2, r2, r4 ; if (bb) lowvalue += split
152 subcs r4, r5, r4 ; if (bb) range = range-split
154 ; Counting the leading zeros is used to normalize range.
156 sub r6, r6, #24 ; shift
158 ; Flag is set on the sum of count. This flag is used later
159 ; to determine if count >= 0
160 adds r3, r3, r6 ; count += shift
161 lsl r5, r4, r6 ; range <<= shift
162 bmi token_count_lt_zero ; if(count >= 0)
164 sub r6, r6, r3 ; offset = shift - count
165 sub r4, r6, #1 ; offset-1
166 lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
167 bpl token_high_bit_not_set
169 ldr r4, [r0, #vp8_writer_pos] ; x
170 sub r4, r4, #1 ; x = w->pos-1
171 b token_zero_while_start
172 token_zero_while_loop
174 strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0
176 token_zero_while_start
178 ldrge r7, [r0, #vp8_writer_buffer]
181 beq token_zero_while_loop
183 ldr r7, [r0, #vp8_writer_buffer]
184 ldrb r10, [r7, r4] ; w->buffer[x]
186 strb r10, [r7, r4] ; w->buffer[x] + 1
187 token_high_bit_not_set
188 rsb r4, r6, #24 ; 24-offset
189 ldr r10, [r0, #vp8_writer_buffer]
190 lsr r7, r2, r4 ; lowvalue >> (24-offset)
191 ldr r4, [r0, #vp8_writer_pos] ; w->pos
192 lsl r2, r2, r6 ; lowvalue <<= offset
193 mov r6, r3 ; shift = count
194 add r11, r4, #1 ; w->pos++
195 bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
196 str r11, [r0, #vp8_writer_pos]
197 sub r3, r3, #8 ; count -= 8
199 VALIDATE_POS r10, r11 ; validate_buffer at pos
201 strb r7, [r10, r4] ; w->buffer[w->pos++]
203 ; r10 is used earlier in the loop, but r10 is used as
204 ; temp variable here. So after r10 is used, reload
205 ; vp8_coef_tree_dcd into r10
206 ldr r10, [sp, #88] ; vp8_coef_tree
209 lsl r2, r2, r6 ; lowvalue <<= shift
211 subs r8, r8, #1 ; --n
214 ldrb r6, [r1, #tokenextra_token] ; t
215 ldr r7, [sp, #84] ; vp8_extra_bits
216 ; Add t * sizeof (vp8_extra_bit_struct) to get the desired
217 ; element. Here vp8_extra_bit_struct == 16
218 add r12, r7, r6, lsl #4 ; b = vp8_extra_bits + t
220 ldr r4, [r12, #vp8_extra_bit_struct_base_val]
225 ldr r8, [r12, #vp8_extra_bit_struct_len] ; L
226 ldrsh lr, [r1, #tokenextra_extra] ; e = p->Extra
230 ldr r9, [r12, #vp8_extra_bit_struct_prob]
231 asr r7, lr, #1 ; v=e>>1
233 ldr r10, [r12, #vp8_extra_bit_struct_tree]
234 str r10, [sp, #4] ; b->tree
242 ldrb r4, [r9, lr, asr #1] ; pp[i>>1]
243 sub r7, r5, #1 ; range-1
244 lsls r12, r12, #1 ; v >> n
245 mul r6, r4, r7 ; (range-1) * pp[i>>1]
246 addcs lr, lr, #1 ; i + bb
249 ldrsb lr, [r10, lr] ; i = b->tree[i+bb]
250 add r4, r7, r6, lsr #8 ; split = 1 + (((range-1) * pp[i>>1]) >> 8)
252 addcs r2, r2, r4 ; if (bb) lowvalue += split
253 subcs r4, r5, r4 ; if (bb) range = range-split
258 adds r3, r3, r6 ; count += shift
259 lsl r5, r4, r6 ; range <<= shift
260 bmi extra_count_lt_zero ; if(count >= 0)
262 sub r6, r6, r3 ; offset= shift - count
263 sub r4, r6, #1 ; offset-1
264 lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
265 bpl extra_high_bit_not_set
267 ldr r4, [r0, #vp8_writer_pos] ; x
268 sub r4, r4, #1 ; x = w->pos - 1
269 b extra_zero_while_start
270 extra_zero_while_loop
272 strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0
274 extra_zero_while_start
276 ldrge r7, [r0, #vp8_writer_buffer]
279 beq extra_zero_while_loop
281 ldr r7, [r0, #vp8_writer_buffer]
285 extra_high_bit_not_set
286 rsb r4, r6, #24 ; 24-offset
287 ldr r10, [r0, #vp8_writer_buffer]
288 lsr r7, r2, r4 ; lowvalue >> (24-offset)
289 ldr r4, [r0, #vp8_writer_pos]
290 lsl r2, r2, r6 ; lowvalue <<= offset
291 mov r6, r3 ; shift = count
292 add r11, r4, #1 ; w->pos++
293 bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
294 str r11, [r0, #vp8_writer_pos]
295 sub r3, r3, #8 ; count -= 8
297 VALIDATE_POS r10, r11 ; validate_buffer at pos
299 strb r7, [r10, r4] ; w->buffer[w->pos++]=(lowvalue >> (24-offset))
300 ldr r10, [sp, #4] ; b->tree
304 subs r8, r8, #1 ; --n
305 bne extra_bits_loop ; while (n)
308 ldr lr, [r1, #4] ; e = p->Extra
309 add r4, r5, #1 ; range + 1
311 lsr r4, r4, #1 ; split = (range + 1) >> 1
312 addne r2, r2, r4 ; lowvalue += split
313 subne r4, r5, r4 ; range = range-split
314 tst r2, #0x80000000 ; lowvalue & 0x80000000
315 lsl r5, r4, #1 ; range <<= 1
316 beq end_high_bit_not_set
318 ldr r4, [r0, #vp8_writer_pos]
321 b end_zero_while_start
327 ldrge r6, [r0, #vp8_writer_buffer]
330 beq end_zero_while_loop
332 ldr r6, [r0, #vp8_writer_buffer]
337 adds r3, r3, #1 ; ++count
338 lsl r2, r2, #1 ; lowvalue <<= 1
341 ldr r4, [r0, #vp8_writer_pos]
342 mvn r3, #7 ; count = -8
343 ldr r7, [r0, #vp8_writer_buffer]
344 lsr r6, r2, #24 ; lowvalue >> 24
345 add r12, r4, #1 ; w->pos++
346 bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
347 str r12, [r0, #vp8_writer_pos]
349 VALIDATE_POS r7, r12 ; validate_buffer at pos
354 add r1, r1, #TOKENEXTRA_SZ ; ++p
356 ldr r4, [sp, #0] ; stop
357 cmp r1, r4 ; while( p < stop)
360 ldr r10, [sp, #20] ; num_parts
361 mov r1, #TOKENLIST_SZ
364 ldr r6, [sp, #12] ; mb_rows
365 ldr r7, [sp, #16] ; tokenlist address
367 add r7, r7, r1 ; next element in the array
375 sub r7, r5, #1 ; range-1
377 mov r4, r7, lsl #7 ; ((range-1) * 128)
380 add r4, r7, r4, lsr #8 ; 1 + (((range-1) * 128) >> 8)
382 ; Counting the leading zeros is used to normalize range.
384 sub r6, r6, #24 ; shift
386 ; Flag is set on the sum of count. This flag is used later
387 ; to determine if count >= 0
388 adds r3, r3, r6 ; count += shift
389 lsl r5, r4, r6 ; range <<= shift
390 bmi token_count_lt_zero_se ; if(count >= 0)
392 sub r6, r6, r3 ; offset = shift - count
393 sub r4, r6, #1 ; offset-1
394 lsls r4, r2, r4 ; if((lowvalue<<(offset-1)) & 0x80000000 )
395 bpl token_high_bit_not_set_se
397 ldr r4, [r0, #vp8_writer_pos] ; x
398 sub r4, r4, #1 ; x = w->pos-1
399 b token_zero_while_start_se
400 token_zero_while_loop_se
402 strb r10, [r7, r4] ; w->buffer[x] =(unsigned char)0
404 token_zero_while_start_se
406 ldrge r7, [r0, #vp8_writer_buffer]
409 beq token_zero_while_loop_se
411 ldr r7, [r0, #vp8_writer_buffer]
412 ldrb r10, [r7, r4] ; w->buffer[x]
414 strb r10, [r7, r4] ; w->buffer[x] + 1
415 token_high_bit_not_set_se
416 rsb r4, r6, #24 ; 24-offset
417 ldr r10, [r0, #vp8_writer_buffer]
418 lsr r7, r2, r4 ; lowvalue >> (24-offset)
419 ldr r4, [r0, #vp8_writer_pos] ; w->pos
420 lsl r2, r2, r6 ; lowvalue <<= offset
421 mov r6, r3 ; shift = count
422 add r11, r4, #1 ; w->pos++
423 bic r2, r2, #0xff000000 ; lowvalue &= 0xffffff
424 str r11, [r0, #vp8_writer_pos]
425 sub r3, r3, #8 ; count -= 8
427 VALIDATE_POS r10, r11 ; validate_buffer at pos
429 strb r7, [r10, r4] ; w->buffer[w->pos++]
431 token_count_lt_zero_se
432 lsl r2, r2, r6 ; lowvalue <<= shift
437 ldr r4, [r0, #vp8_writer_pos] ; w->pos
438 ldr r12, [sp, #24] ; ptr
439 add r12, r12, r4 ; ptr += w->pos
442 ldr r11, [sp, #28] ; i
443 ldr r10, [sp, #20] ; num_parts
445 add r11, r11, #1 ; i++
448 ldr r7, [sp, #32] ; cpi->tp_list[i]
449 mov r1, #TOKENLIST_SZ
450 add r7, r7, r1 ; next element in cpi->tp_list
451 str r7, [sp, #32] ; cpi->tp_list[i+1]
463 DCD vp8_common_mb_rows