;================================================
|vp8_variance_halfpixvar16x16_h_neon| PROC
push {lr}
+ vpush {d8-d15}
mov r12, #4 ;loop counter
- ldr lr, [sp, #4] ;load *sse from stack
+ ldr lr, [sp, #68] ;load *sse from stack
vmov.i8 q8, #0 ;q8 - sum
vmov.i8 q9, #0 ;q9, q10 - sse
vmov.i8 q10, #0
vsub.u32 d0, d1, d10
vmov.32 r0, d0[0] ;return
+
+ vpop {d8-d15}
pop {pc}
ENDP
;================================================
|vp8_variance_halfpixvar16x16_v_neon| PROC
push {lr}
+ vpush {d8-d15}
mov r12, #4 ;loop counter
vld1.u8 {q0}, [r0], r1 ;load src data
- ldr lr, [sp, #4] ;load *sse from stack
+ ldr lr, [sp, #68] ;load *sse from stack
vmov.i8 q8, #0 ;q8 - sum
vmov.i8 q9, #0 ;q9, q10 - sse
vsub.u32 d0, d1, d10
vmov.32 r0, d0[0] ;return
+
+ vpop {d8-d15}
pop {pc}
ENDP
;================================================
|vp8_variance_halfpixvar16x16_hv_neon| PROC
push {lr}
+ vpush {d8-d15}
vld1.u8 {d0, d1, d2, d3}, [r0], r1 ;load src data
- ldr lr, [sp, #4] ;load *sse from stack
+ ldr lr, [sp, #68] ;load *sse from stack
vmov.i8 q13, #0 ;q8 - sum
vext.8 q1, q0, q1, #1 ;construct src_ptr[1]
vsub.u32 d0, d1, d10
vmov.32 r0, d0[0] ;return
+
+ vpop {d8-d15}
pop {pc}
ENDP
|vp8_sub_pixel_variance16x16s_neon| PROC
push {r4, lr}
+ vpush {d8-d15}
- ldr r4, [sp, #8] ;load *dst_ptr from stack
- ldr r12, [sp, #12] ;load dst_pixels_per_line from stack
- ldr lr, [sp, #16] ;load *sse from stack
+ ldr r4, [sp, #72] ;load *dst_ptr from stack
+ ldr r12, [sp, #76] ;load dst_pixels_per_line from stack
+ ldr lr, [sp, #80] ;load *sse from stack
cmp r2, #0 ;skip first_pass filter if xoffset=0
beq secondpass_bfilter16x16s_only
add sp, sp, #256
vmov.32 r0, d0[0] ;return
+ vpop {d8-d15}
pop {r4, pc}
ENDP