2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
12 EXPORT |vp8_build_intra_predictors_mby_neon_func|
13 EXPORT |vp8_build_intra_predictors_mby_s_neon_func|
19 AREA ||.text||, CODE, READONLY, ALIGN=2
20 ; r0 unsigned char *y_buffer
21 ; r1 unsigned char *ypred_ptr
27 |vp8_build_intra_predictors_mby_neon_func| PROC
41 ldr r4, [sp, #88] ; Up
42 ldr r5, [sp, #92] ; Left
44 ; Default the DC average to 128
48 ; Zero out running sum
51 ; compute shift and jump
53 beq skip_dc_pred_up_left
55 ; Load above row, if it exists
70 ; Move back to interger registers
79 ; Load left row, if it exists
121 add r7, r7, #3 ; Shift
124 add r12, r12, r5, lsl r4
125 mov r5, r12, lsr r7 ; expected_dc
150 ; Copy down above row
244 ; Compute yabove_row - ytop_left
271 vqadd.s16 q10, q1, q4
272 vqadd.s16 q11, q1, q5
274 vqadd.s16 q12, q2, q4
275 vqadd.s16 q13, q2, q5
277 vqadd.s16 q14, q3, q4
278 vqadd.s16 q15, q3, q5
280 vqshrun.s16 d0, q8, #0
281 vqshrun.s16 d1, q9, #0
283 vqshrun.s16 d2, q10, #0
284 vqshrun.s16 d3, q11, #0
286 vqshrun.s16 d4, q12, #0
287 vqshrun.s16 d5, q13, #0
289 vqshrun.s16 d6, q14, #0
290 vqshrun.s16 d7, q15, #0
298 bne case_tm_pred_loop
305 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
306 ; r0 unsigned char *y_buffer
307 ; r1 unsigned char *ypred_ptr
313 |vp8_build_intra_predictors_mby_s_neon_func| PROC
317 mov r1, r0 ; unsigned char *ypred_ptr = x->dst.y_buffer; //x->Predictor;
329 ldr r4, [sp, #88] ; Up
330 ldr r5, [sp, #92] ; Left
332 ; Default the DC average to 128
336 ; Zero out running sum
339 ; compute shift and jump
341 beq skip_dc_pred_up_left_s
343 ; Load above row, if it exists
345 beq skip_dc_pred_up_s
358 ; Move back to interger registers
363 beq skip_dc_pred_left_s
367 ; Load left row, if it exists
409 add r7, r7, #3 ; Shift
412 add r12, r12, r5, lsl r4
413 mov r5, r12, lsr r7 ; expected_dc
417 skip_dc_pred_up_left_s
418 vst1.u8 {q0}, [r1], r2
419 vst1.u8 {q0}, [r1], r2
420 vst1.u8 {q0}, [r1], r2
421 vst1.u8 {q0}, [r1], r2
422 vst1.u8 {q0}, [r1], r2
423 vst1.u8 {q0}, [r1], r2
424 vst1.u8 {q0}, [r1], r2
425 vst1.u8 {q0}, [r1], r2
426 vst1.u8 {q0}, [r1], r2
427 vst1.u8 {q0}, [r1], r2
428 vst1.u8 {q0}, [r1], r2
429 vst1.u8 {q0}, [r1], r2
430 vst1.u8 {q0}, [r1], r2
431 vst1.u8 {q0}, [r1], r2
432 vst1.u8 {q0}, [r1], r2
433 vst1.u8 {q0}, [r1], r2
438 ; Copy down above row
442 vst1.u8 {q0}, [r1], r2
443 vst1.u8 {q0}, [r1], r2
444 vst1.u8 {q0}, [r1], r2
445 vst1.u8 {q0}, [r1], r2
446 vst1.u8 {q0}, [r1], r2
447 vst1.u8 {q0}, [r1], r2
448 vst1.u8 {q0}, [r1], r2
449 vst1.u8 {q0}, [r1], r2
450 vst1.u8 {q0}, [r1], r2
451 vst1.u8 {q0}, [r1], r2
452 vst1.u8 {q0}, [r1], r2
453 vst1.u8 {q0}, [r1], r2
454 vst1.u8 {q0}, [r1], r2
455 vst1.u8 {q0}, [r1], r2
456 vst1.u8 {q0}, [r1], r2
457 vst1.u8 {q0}, [r1], r2
474 vst1.u8 {q0}, [r1], r2
475 vst1.u8 {q1}, [r1], r2
476 vst1.u8 {q2}, [r1], r2
477 vst1.u8 {q3}, [r1], r2
487 vst1.u8 {q0}, [r1], r2
488 vst1.u8 {q1}, [r1], r2
489 vst1.u8 {q2}, [r1], r2
490 vst1.u8 {q3}, [r1], r2
501 vst1.u8 {q0}, [r1], r2
502 vst1.u8 {q1}, [r1], r2
503 vst1.u8 {q2}, [r1], r2
504 vst1.u8 {q3}, [r1], r2
514 vst1.u8 {q0}, [r1], r2
515 vst1.u8 {q1}, [r1], r2
516 vst1.u8 {q2}, [r1], r2
517 vst1.u8 {q3}, [r1], r2
533 ; Compute yabove_row - ytop_left
560 vqadd.s16 q10, q1, q4
561 vqadd.s16 q11, q1, q5
563 vqadd.s16 q12, q2, q4
564 vqadd.s16 q13, q2, q5
566 vqadd.s16 q14, q3, q4
567 vqadd.s16 q15, q3, q5
569 vqshrun.s16 d0, q8, #0
570 vqshrun.s16 d1, q9, #0
572 vqshrun.s16 d2, q10, #0
573 vqshrun.s16 d3, q11, #0
575 vqshrun.s16 d4, q12, #0
576 vqshrun.s16 d5, q13, #0
578 vqshrun.s16 d6, q14, #0
579 vqshrun.s16 d7, q15, #0
581 vst1.u8 {q0}, [r1], r2
582 vst1.u8 {q1}, [r1], r2
583 vst1.u8 {q2}, [r1], r2
584 vst1.u8 {q3}, [r1], r2
587 bne case_tm_pred_loop_s