.endm
.macro function name, export=0
+ .macro endfunc
+ .size \name, . - \name
+ .endfunc
+ .purgem endfunc
+ .endm
.if \export
.global EXTERN_ASM\name
EXTERN_ASM\name:
add r0, r0, r1
bne ff_prefetch_arm
bx lr
- .endfunc
+endfunc
#endif
.macro ALIGN_QWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4
add r0, r0, r2
bne 4b
pop {r4-r11,pc}
- .endfunc
+endfunc
@ ----------------------------------------------------------------
.align 5
add r0, r0, r2
bne 4b
pop {r4-r5,pc}
- .endfunc
+endfunc
@ ----------------------------------------------------------------
.align 5
add r0, r0, r2
bne 4b
pop {r4-r10,pc}
- .endfunc
+endfunc
.align 5
function ff_put_no_rnd_pixels8_x2_arm, export=1
add r0, r0, r2
bne 4b
pop {r4-r10,pc}
- .endfunc
+endfunc
@ ----------------------------------------------------------------
add r0, r0, r2
bne 6b
pop {r4-r11,pc}
- .endfunc
+endfunc
.align 5
function ff_put_no_rnd_pixels8_y2_arm, export=1
add r0, r0, r2
bne 6b
pop {r4-r11,pc}
- .endfunc
+endfunc
.ltorg
3: RND_XY2_EXPAND 2, lsl
.align 5
4: RND_XY2_EXPAND 3, lsl
- .endfunc
+endfunc
.align 5
function ff_put_no_rnd_pixels8_xy2_arm, export=1
3: RND_XY2_EXPAND 2, lsr
.align 5
4: RND_XY2_EXPAND 3, lsr
- .endfunc
+endfunc
.align 5
@ void ff_add_pixels_clamped_arm(int16_t *block, uint8_t *dest, int stride)
pop {r4-r10}
bx lr
- .endfunc
+endfunc
add r0, r0, #8
add r1, r1, #8
b ff_\type\()_pixels8\subp\()_armv6
-.endfunc
+endfunc
.endm
call_2x_pixels avg
pop {r4-r11}
bx lr
-.endfunc
+endfunc
function ff_put_pixels8_armv6, export=1
push {r4-r7}
pop {r4-r7}
bx lr
-.endfunc
+endfunc
function ff_put_pixels8_x2_armv6, export=1
push {r4-r11, lr}
bne 1b
pop {r4-r11, pc}
-.endfunc
+endfunc
function ff_put_pixels8_y2_armv6, export=1
push {r4-r11}
pop {r4-r11}
bx lr
-.endfunc
+endfunc
function ff_put_pixels8_x2_no_rnd_armv6, export=1
push {r4-r9, lr}
bne 1b
pop {r4-r9, pc}
-.endfunc
+endfunc
function ff_put_pixels8_y2_no_rnd_armv6, export=1
push {r4-r9, lr}
bne 1b
pop {r4-r9, pc}
-.endfunc
+endfunc
function ff_avg_pixels8_armv6, export=1
pld [r1, r2]
strd r6, r7, [r0], r2
pop {r4-r10, pc}
-.endfunc
+endfunc
function ff_add_pixels_clamped_armv6, export=1
push {r4-r8,lr}
strd r6, r7, [r1], r2
bgt 1b
pop {r4-r8,pc}
-.endfunc
+endfunc
function ff_get_pixels_armv6, export=1
pld [r1, r2]
bgt 1b
pop {r4-r8, pc}
-.endfunc
+endfunc
function ff_diff_pixels_armv6, export=1
pld [r1, r3]
bgt 1b
pop {r4-r9, pc}
-.endfunc
+endfunc
function ff_pix_abs16_armv6, export=1
ldr r0, [sp]
2:
add r0, r12, lr
pop {r4-r9, pc}
-.endfunc
+endfunc
function ff_pix_abs16_x2_armv6, export=1
ldr r12, [sp]
bgt 1b
pop {r4-r11, pc}
-.endfunc
+endfunc
.macro usad_y2 p0, p1, p2, p3, n0, n1, n2, n3
ldr \n0, [r2]
bgt 1b
pop {r4-r11, pc}
-.endfunc
+endfunc
function ff_pix_abs8_armv6, export=1
pld [r2, r3]
usada8 lr, r9, r7, lr
add r0, r0, lr
pop {r4-r9, pc}
-.endfunc
+endfunc
function ff_sse16_armv6, export=1
ldr r12, [sp]
bgt 1b
pop {r4-r9, pc}
-.endfunc
+endfunc
function ff_pix_norm1_armv6, export=1
push {r4-r6, lr}
mov r0, lr
pop {r4-r6, pc}
-.endfunc
+endfunc
function ff_pix_sum_armv6, export=1
push {r4-r7, lr}
usada8 r3, r7, lr, r3
add r0, r2, r3
pop {r4-r7, pc}
-.endfunc
+endfunc
.macro pixfunc pfx name suf rnd_op args:vararg
function ff_\pfx\name\suf\()_neon, export=1
\name \rnd_op \args
- .endfunc
+endfunc
.endm
.macro pixfunc2 pfx name args:vararg
function ff_put_h264_qpel16_mc00_neon, export=1
mov r3, #16
- .endfunc
+endfunc
pixfunc put_ pixels16
pixfunc2 put_ pixels16_x2, _no_rnd, vhadd.u8
function ff_avg_h264_qpel16_mc00_neon, export=1
mov r3, #16
- .endfunc
+endfunc
pixfunc avg_ pixels16,, 1
function ff_put_h264_qpel8_mc00_neon, export=1
mov r3, #8
- .endfunc
+endfunc
pixfunc put_ pixels8
pixfunc2 put_ pixels8_x2, _no_rnd, vhadd.u8
function ff_avg_h264_qpel8_mc00_neon, export=1
mov r3, #8
- .endfunc
+endfunc
pixfunc avg_ pixels8,, 1
vst1.64 {d6}, [r1,:64], r2
vst1.64 {d7}, [r1,:64], r2
bx lr
- .endfunc
+endfunc
function ff_put_signed_pixels_clamped_neon, export=1
vmov.u8 d31, #128
vst1.64 {d6}, [r1,:64], r2
vst1.64 {d7}, [r1,:64], r2
bx lr
- .endfunc
+endfunc
function ff_add_pixels_clamped_neon, export=1
mov r3, r1
vst1.64 {d4}, [r3,:64], r2
vst1.64 {d6}, [r3,:64], r2
bx lr
- .endfunc
+endfunc
function ff_float_to_int16_neon, export=1
subs r2, r2, #8
vshrn.s32 d5, q9, #16
vst1.64 {d4-d5}, [r0,:128]!
bx lr
- .endfunc
+endfunc
function ff_float_to_int16_interleave_neon, export=1
cmp r3, #2
vld1.64 {d2-d3}, [r4,:128]!
vcvt.s32.f32 q1, q1, #16
b 6b
- .endfunc
+endfunc
function ff_vector_fmul_neon, export=1
mov r3, r0
vmul.f32 q9, q1, q3
3: vst1.64 {d16-d19},[r3,:128]!
bx lr
- .endfunc
+endfunc
function ff_vector_fmul_window_neon, export=1
VFP vdup.32 q8, d0[0]
vst1.64 {d20,d21},[r0,:128]!
vst1.64 {d22,d23},[ip,:128], r5
pop {r4,r5,pc}
- .endfunc
+endfunc
#if CONFIG_VORBIS_DECODER
function ff_vorbis_inverse_coupling_neon, export=1
vst1.32 {d2-d3}, [r0,:128]!
vst1.32 {d0-d1}, [r1,:128]!
bx lr
- .endfunc
+endfunc
#endif
function ff_vector_fmul_scalar_neon, export=1
bgt 3b
bx lr
.unreq len
- .endfunc
+endfunc
function ff_vector_fmul_sv_scalar_2_neon, export=1
VFP vdup.32 d16, d0[0]
2: vst1.32 {d4},[r0,:64]!
vst1.32 {d5},[r0,:64]!
bx lr
- .endfunc
+endfunc
function ff_vector_fmul_sv_scalar_4_neon, export=1
VFP vdup.32 q10, d0[0]
subs r3, r3, #4
bgt 3b
pop {pc}
- .endfunc
+endfunc
function ff_sv_fmul_scalar_2_neon, export=1
VFP len .req r2
2: vst1.32 {q1},[r0,:128]!
bx lr
.unreq len
- .endfunc
+endfunc
function ff_sv_fmul_scalar_4_neon, export=1
VFP len .req r2
bgt 1b
bx lr
.unreq len
- .endfunc
+endfunc
function ff_butterflies_float_neon, export=1
1: vld1.32 {q0},[r0,:128]
subs r2, r2, #4
bgt 1b
bx lr
- .endfunc
+endfunc
function ff_scalarproduct_float_neon, export=1
vmov.f32 q2, #0.0
vpadd.f32 d0, d0, d0
NOVFP vmov.32 r0, d0[0]
bx lr
- .endfunc
+endfunc
function ff_int32_to_float_fmul_scalar_neon, export=1
VFP vdup.32 q0, d0[0]
vst1.32 {q10},[r0,:128]!
bx lr
.unreq len
- .endfunc
+endfunc
function ff_vector_fmul_reverse_neon, export=1
add r2, r2, r3, lsl #2
b 1b
2: vst1.32 {q8-q9}, [r0,:128]!
bx lr
- .endfunc
+endfunc
function ff_vector_fmul_add_neon, export=1
ldr r12, [sp]
b 1b
2: vst1.32 {q12-q13},[r0,:128]!
bx lr
- .endfunc
+endfunc
function ff_vector_clipf_neon, export=1
VFP vdup.32 q1, d0[1]
2: vst1.f32 {q8},[r0,:128]!
vst1.f32 {q9},[r0,:128]!
bx lr
- .endfunc
+endfunc
fmxr fpscr, r12
vpop {d8-d15}
bx lr
- .endfunc
+endfunc
/**
* ARM VFP optimized implementation of 'vector_fmul_reverse_c' function.
vpop {d8-d15}
bx lr
- .endfunc
+endfunc
#if HAVE_ARMV6
/**
vpop {d8-d11}
pop {r4-r8,pc}
- .endfunc
+endfunc
#endif
vst1.32 {d0-d3}, [r0,:128]
bx lr
-.endfunc
+endfunc
function fft8_neon
mov r1, r0
vst1.32 {d0-d3}, [r0,:128]
bx lr
-.endfunc
+endfunc
function fft16_neon
movrel r1, mppm
vst2.32 {d26-d27},[r0,:128], r1
vst2.32 {d30-d31},[r0,:128]
bx lr
-.endfunc
+endfunc
function fft_pass_neon
push {r4-r6,lr}
bne 1b
pop {r4-r6,pc}
-.endfunc
+endfunc
.macro def_fft n, n2, n4
.align 6
movrel r1, X(ff_cos_\n)
mov r2, #\n4/2
b fft_pass_neon
-.endfunc
+endfunc
.endm
def_fft 32, 16, 8
ldr r3, [r3, r2, lsl #2]
mov r0, r1
bx r3
-.endfunc
+endfunc
function ff_fft_permute_neon, export=1
push {r4,lr}
bgt 1b
pop {r4,pc}
-.endfunc
+endfunc
.section .rodata
.align 4
bgt 5b
pop {r4-r7, pc}
- .endfunc
+endfunc
.endm
/* chroma_mc4(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */
bgt 5b
pop {r4-r7, pc}
- .endfunc
+endfunc
.endm
.macro h264_chroma_mc2 type
subs r3, r3, #2
bgt 2b
pop {r4-r6, pc}
- .endfunc
+endfunc
.endm
.text
align_pop_regs
bx lr
- .endfunc
+endfunc
function ff_h264_h_loop_filter_luma_neon, export=1
h264_loop_filter_start
align_pop_regs
bx lr
- .endfunc
+endfunc
.macro h264_loop_filter_chroma
vdup.8 d22, r2 @ alpha
vst1.64 {d0}, [r0,:64], r1
bx lr
- .endfunc
+endfunc
function ff_h264_h_loop_filter_chroma_neon, export=1
h264_loop_filter_start
vst1.32 {d2[1]}, [r0], r1
bx lr
- .endfunc
+endfunc
/* H.264 qpel MC */
mov ip, #16
mov lr, r4
b put_h264_qpel8_h_lowpass_neon
- .endfunc
+endfunc
.macro h264_qpel_h_lowpass type
function \type\()_h264_qpel16_h_lowpass_neon
add r1, r1, #8
mov ip, #16
pop {lr}
- .endfunc
+endfunc
function \type\()_h264_qpel8_h_lowpass_neon
1: vld1.64 {d0, d1}, [r1], r2
vst1.64 {d16}, [r0,:64], r3
bne 1b
bx lr
- .endfunc
+endfunc
.endm
h264_qpel_h_lowpass put
add r3, r3, #8
mov ip, #16
pop {lr}
- .endfunc
+endfunc
function \type\()_h264_qpel8_h_lowpass_l2_neon
1: vld1.64 {d0, d1}, [r1], r2
vst1.64 {d1}, [r0,:64], r2
bne 1b
bx lr
- .endfunc
+endfunc
.endm
h264_qpel_h_lowpass_l2 put
sub r1, r1, r3, lsl #2
mov lr, r4
b put_h264_qpel8_v_lowpass_neon
- .endfunc
+endfunc
.macro h264_qpel_v_lowpass type
function \type\()_h264_qpel16_v_lowpass_neon
bl \type\()_h264_qpel8_v_lowpass_neon
sub r1, r1, r3, lsl #2
mov lr, r4
- .endfunc
+endfunc
function \type\()_h264_qpel8_v_lowpass_neon
vld1.64 {d8}, [r1], r3
vst1.64 {d28}, [r0,:64], r2
bx lr
- .endfunc
+endfunc
.endm
h264_qpel_v_lowpass put
bl \type\()_h264_qpel8_v_lowpass_l2_neon
sub r1, r1, r3, lsl #2
mov lr, r4
- .endfunc
+endfunc
function \type\()_h264_qpel8_v_lowpass_l2_neon
vld1.64 {d8}, [r1], r3
vst1.64 {d11}, [r0,:64], r3
bx lr
- .endfunc
+endfunc
.endm
h264_qpel_v_lowpass_l2 put
transpose_8x8 d12, d13, d14, d15, d8, d9, d10, d11
bx lr
- .endfunc
+endfunc
.macro h264_qpel8_hv_lowpass type
function \type\()_h264_qpel8_hv_lowpass_neon
mov lr, r10
bx lr
- .endfunc
+endfunc
.endm
h264_qpel8_hv_lowpass put
mov lr, r10
bx lr
- .endfunc
+endfunc
.endm
h264_qpel8_hv_lowpass_l2 put
sub r1, r1, r3, lsl #2
mov lr, r9
b \type\()_h264_qpel8_hv_lowpass_neon
- .endfunc
+endfunc
function \type\()_h264_qpel16_hv_lowpass_l2_neon
mov r9, lr
sub r1, r1, r3, lsl #2
mov lr, r9
b \type\()_h264_qpel8_hv_lowpass_l2_neon
- .endfunc
+endfunc
.endm
h264_qpel16_hv put
sub r1, r1, #2
mov ip, #8
b \type\()_h264_qpel8_h_lowpass_l2_neon
- .endfunc
+endfunc
function ff_\type\()_h264_qpel8_mc20_neon, export=1
lowpass_const r3
mov r3, r2
mov ip, #8
b \type\()_h264_qpel8_h_lowpass_neon
- .endfunc
+endfunc
function ff_\type\()_h264_qpel8_mc30_neon, export=1
lowpass_const r3
sub r1, r1, #2
mov ip, #8
b \type\()_h264_qpel8_h_lowpass_l2_neon
- .endfunc
+endfunc
function ff_\type\()_h264_qpel8_mc01_neon, export=1
push {lr}
bl \type\()_h264_qpel8_v_lowpass_l2_neon
vpop {d8-d15}
pop {pc}
- .endfunc
+endfunc
function ff_\type\()_h264_qpel8_mc11_neon, export=1
push {r0, r1, r11, lr}
vpop {d8-d15}
add sp, r11, #8
pop {r11, pc}
- .endfunc
+endfunc
function ff_\type\()_h264_qpel8_mc21_neon, export=1
push {r0, r1, r4, r10, r11, lr}
vpop {d8-d15}
add sp, r11, #8
pop {r4, r10, r11, pc}
- .endfunc
+endfunc
function ff_\type\()_h264_qpel8_mc31_neon, export=1
add r1, r1, #1
push {r0, r1, r11, lr}
sub r1, r1, #1
b \type\()_h264_qpel8_mc11
- .endfunc
+endfunc
function ff_\type\()_h264_qpel8_mc02_neon, export=1
push {lr}
bl \type\()_h264_qpel8_v_lowpass_neon
vpop {d8-d15}
pop {pc}
- .endfunc
+endfunc
function ff_\type\()_h264_qpel8_mc12_neon, export=1
push {r0, r1, r4, r10, r11, lr}
vpop {d8-d15}
add sp, r11, #8
pop {r4, r10, r11, pc}
- .endfunc
+endfunc
function ff_\type\()_h264_qpel8_mc22_neon, export=1
push {r4, r10, r11, lr}
vpop {d8-d15}
mov sp, r11
pop {r4, r10, r11, pc}
- .endfunc
+endfunc
function ff_\type\()_h264_qpel8_mc32_neon, export=1
push {r0, r1, r4, r10, r11, lr}
add r1, r1, #1
b \type\()_h264_qpel8_mc12
- .endfunc
+endfunc
function ff_\type\()_h264_qpel8_mc03_neon, export=1
push {lr}
add ip, r1, r2
b \type\()_h264_qpel8_mc01
- .endfunc
+endfunc
function ff_\type\()_h264_qpel8_mc13_neon, export=1
push {r0, r1, r11, lr}
add r1, r1, r2
b \type\()_h264_qpel8_mc11
- .endfunc
+endfunc
function ff_\type\()_h264_qpel8_mc23_neon, export=1
push {r0, r1, r4, r10, r11, lr}
add r1, r1, r2
b \type\()_h264_qpel8_mc21
- .endfunc
+endfunc
function ff_\type\()_h264_qpel8_mc33_neon, export=1
add r1, r1, #1
add r1, r1, r2
sub r1, r1, #1
b \type\()_h264_qpel8_mc11
- .endfunc
+endfunc
.endm
h264_qpel8 put
mov r3, r1
sub r1, r1, #2
b \type\()_h264_qpel16_h_lowpass_l2_neon
- .endfunc
+endfunc
function ff_\type\()_h264_qpel16_mc20_neon, export=1
lowpass_const r3
sub r1, r1, #2
mov r3, r2
b \type\()_h264_qpel16_h_lowpass_neon
- .endfunc
+endfunc
function ff_\type\()_h264_qpel16_mc30_neon, export=1
lowpass_const r3
add r3, r1, #1
sub r1, r1, #2
b \type\()_h264_qpel16_h_lowpass_l2_neon
- .endfunc
+endfunc
function ff_\type\()_h264_qpel16_mc01_neon, export=1
push {r4, lr}
bl \type\()_h264_qpel16_v_lowpass_l2_neon
vpop {d8-d15}
pop {r4, pc}
- .endfunc
+endfunc
function ff_\type\()_h264_qpel16_mc11_neon, export=1
push {r0, r1, r4, r11, lr}
vpop {d8-d15}
add sp, r11, #8
pop {r4, r11, pc}
- .endfunc
+endfunc
function ff_\type\()_h264_qpel16_mc21_neon, export=1
push {r0, r1, r4-r5, r9-r11, lr}
vpop {d8-d15}
add sp, r11, #8
pop {r4-r5, r9-r11, pc}
- .endfunc
+endfunc
function ff_\type\()_h264_qpel16_mc31_neon, export=1
add r1, r1, #1
push {r0, r1, r4, r11, lr}
sub r1, r1, #1
b \type\()_h264_qpel16_mc11
- .endfunc
+endfunc
function ff_\type\()_h264_qpel16_mc02_neon, export=1
push {r4, lr}
bl \type\()_h264_qpel16_v_lowpass_neon
vpop {d8-d15}
pop {r4, pc}
- .endfunc
+endfunc
function ff_\type\()_h264_qpel16_mc12_neon, export=1
push {r0, r1, r4-r5, r9-r11, lr}
vpop {d8-d15}
add sp, r11, #8
pop {r4-r5, r9-r11, pc}
- .endfunc
+endfunc
function ff_\type\()_h264_qpel16_mc22_neon, export=1
push {r4, r9-r11, lr}
vpop {d8-d15}
mov sp, r11
pop {r4, r9-r11, pc}
- .endfunc
+endfunc
function ff_\type\()_h264_qpel16_mc32_neon, export=1
push {r0, r1, r4-r5, r9-r11, lr}
add r1, r1, #1
b \type\()_h264_qpel16_mc12
- .endfunc
+endfunc
function ff_\type\()_h264_qpel16_mc03_neon, export=1
push {r4, lr}
add ip, r1, r2
b \type\()_h264_qpel16_mc01
- .endfunc
+endfunc
function ff_\type\()_h264_qpel16_mc13_neon, export=1
push {r0, r1, r4, r11, lr}
add r1, r1, r2
b \type\()_h264_qpel16_mc11
- .endfunc
+endfunc
function ff_\type\()_h264_qpel16_mc23_neon, export=1
push {r0, r1, r4-r5, r9-r11, lr}
add r1, r1, r2
b \type\()_h264_qpel16_mc21
- .endfunc
+endfunc
function ff_\type\()_h264_qpel16_mc33_neon, export=1
add r1, r1, #1
add r1, r1, r2
sub r1, r1, #1
b \type\()_h264_qpel16_mc11
- .endfunc
+endfunc
.endm
h264_qpel16 put
biweight_\w vmlsl.u8, vmlsl.u8
40: rsb r5, r5, #0
biweight_\w vmlsl.u8, vmlal.u8
- .endfunc
+endfunc
.endm
.macro biweight_entry w, h, b=1
.if \b
b biweight_h264_pixels_\w\()_neon
.endif
- .endfunc
+endfunc
.endm
biweight_entry 16, 8
weight_\w vadd.s16
10: rsb r3, r3, #0
weight_\w vsub.s16
- .endfunc
+endfunc
.endm
.macro weight_entry w, h, b=1
.if \b
b weight_h264_pixels_\w\()_neon
.endif
- .endfunc
+endfunc
.endm
weight_entry 16, 8
vst1.32 {d1[0]}, [r0,:32], r2
bx lr
- .endfunc
+endfunc
function ff_h264_idct_dc_add_neon, export=1
vld1.16 {d2[],d3[]}, [r1,:16]
vst1.32 {d1[0]}, [r0,:32], r2
vst1.32 {d1[1]}, [r0,:32], r2
bx lr
- .endfunc
+endfunc
function ff_h264_idct_add16_neon, export=1
push {r4-r8,lr}
add r1, r1, #32
bne 1b
pop {r4-r8,pc}
- .endfunc
+endfunc
function ff_h264_idct_add16intra_neon, export=1
push {r4-r8,lr}
add r1, r1, #32
bne 1b
pop {r4-r8,pc}
- .endfunc
+endfunc
function ff_h264_idct_add8_neon, export=1
push {r4-r10,lr}
add r1, r1, #32
bne 1b
pop {r4-r10,pc}
- .endfunc
+endfunc
.section .rodata
scan8: .byte 4+1*8, 5+1*8, 4+2*8, 5+2*8
function ff_pred16x16_128_dc_neon, export=1
vmov.i8 q0, #128
b .L_pred16x16_dc_end
- .endfunc
+endfunc
function ff_pred16x16_top_dc_neon, export=1
sub r2, r0, r1
vrshrn.u16 d0, q0, #4
vdup.8 q0, d0[0]
b .L_pred16x16_dc_end
- .endfunc
+endfunc
function ff_pred16x16_left_dc_neon, export=1
sub r2, r0, #1
vrshrn.u16 d0, q0, #4
vdup.8 q0, d0[0]
b .L_pred16x16_dc_end
- .endfunc
+endfunc
function ff_pred16x16_dc_neon, export=1
sub r2, r0, r1
subs r3, r3, #1
bne 6b
bx lr
- .endfunc
+endfunc
function ff_pred16x16_hor_neon, export=1
sub r2, r0, #1
subs r3, r3, #1
bne 1b
bx lr
- .endfunc
+endfunc
function ff_pred16x16_vert_neon, export=1
sub r0, r0, r1
subs r3, r3, #1
bne 1b
bx lr
- .endfunc
+endfunc
function ff_pred16x16_plane_neon, export=1
sub r3, r0, r1
subs r3, r3, #1
bne 1b
bx lr
- .endfunc
+endfunc
.section .rodata
.align 4
subs r3, r3, #1
bne 1b
bx lr
- .endfunc
+endfunc
function ff_pred8x8_vert_neon, export=1
sub r0, r0, r1
subs r3, r3, #1
bne 1b
bx lr
- .endfunc
+endfunc
function ff_pred8x8_plane_neon, export=1
sub r3, r0, r1
subs r3, r3, #1
bne 1b
bx lr
- .endfunc
+endfunc
function ff_pred8x8_128_dc_neon, export=1
vmov.i8 q0, #128
b .L_pred8x8_dc_end
- .endfunc
+endfunc
function ff_pred8x8_top_dc_neon, export=1
sub r2, r0, r1
vdup.8 d0, d0[0]
vtrn.32 d0, d1
b .L_pred8x8_dc_end
- .endfunc
+endfunc
function ff_pred8x8_left_dc_neon, export=1
sub r2, r0, #1
vdup.8 d1, d0[1]
vdup.8 d0, d0[0]
b .L_pred8x8_dc_end
- .endfunc
+endfunc
function ff_pred8x8_dc_neon, export=1
sub r2, r0, r1
subs r3, r3, #1
bne 6b
bx lr
- .endfunc
+endfunc
function ff_pred8x8_l0t_dc_neon, export=1
sub r2, r0, r1
vdup.8 q2, d3[2]
vtrn.32 q0, q2
b .L_pred8x8_dc_end
- .endfunc
+endfunc
function ff_pred8x8_l00_dc_neon, export=1
sub r2, r0, #1
vmov.i8 d1, #128
vdup.8 d0, d0[0]
b .L_pred8x8_dc_end
- .endfunc
+endfunc
function ff_pred8x8_0lt_dc_neon, export=1
sub r2, r0, r1
vdup.8 d5, d2[5]
vtrn.32 q0, q2
b .L_pred8x8_dc_end
- .endfunc
+endfunc
function ff_pred8x8_0l0_dc_neon, export=1
add r2, r0, r1, lsl #2
vmov.i8 d0, #128
vdup.8 d1, d1[0]
b .L_pred8x8_dc_end
- .endfunc
+endfunc
vpaddl.s32 d3, d2
vmov.32 r0, d3[0]
bx lr
- .endfunc
+endfunc
@ scalarproduct_and_madd_int16(/*aligned*/v0,v1,v2,order,mul)
function ff_scalarproduct_and_madd_int16_neon, export=1
vpaddl.s32 d3, d2
vmov.32 r0, d3[0]
bx lr
- .endfunc
+endfunc
vst2.32 {d5,d7}, [r8,:128]
pop {r4-r8,pc}
-.endfunc
+endfunc
function ff_imdct_calc_neon, export=1
push {r4-r6,lr}
bgt 1b
pop {r4-r6,pc}
-.endfunc
+endfunc
function ff_mdct_calc_neon, export=1
push {r4-r10,lr}
vst2.32 {d5,d7}, [r8,:128]
pop {r4-r10,pc}
-.endfunc
+endfunc
strh r9, [r0], #2
strh lr, [r0], #2
pop {r4-r9,pc}
- .endfunc
+endfunc
strd a3, [a1, #8]
ldr pc, [sp], #4
- .endfunc
+endfunc
.macro idct_col
ldr a4, [a1] /* a4 = col[1:0] */
str a2, [a1, #(16*4)]
ldr pc, [sp], #4
- .endfunc
+endfunc
function idct_col_put_armv5te
str lr, [sp, #-4]!
strh a2, [v2, -lr]
ldr pc, [sp], #4
- .endfunc
+endfunc
function idct_col_add_armv5te
str lr, [sp, #-4]!
strh a2, [v2]
ldr pc, [sp], #4
- .endfunc
+endfunc
function ff_simple_idct_armv5te, export=1
stmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, lr}
bl idct_col_armv5te
ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
- .endfunc
+endfunc
function ff_simple_idct_add_armv5te, export=1
stmfd sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr}
add sp, sp, #8
ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
- .endfunc
+endfunc
function ff_simple_idct_put_armv5te, export=1
stmfd sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr}
add sp, sp, #8
ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc}
- .endfunc
+endfunc
strh r2, [r1, #(16*5)]
strh r2, [r1, #(16*7)]
pop {pc}
- .endfunc
+endfunc
/*
Compute IDCT of single column, read as row.
strh r8, [r1, #(16*7)]
pop {pc}
- .endfunc
+endfunc
/*
Compute IDCT of single column, read as row, store saturated 8-bit.
sub r1, r1, r2, lsl #3
pop {pc}
- .endfunc
+endfunc
/*
Compute IDCT of single column, read as row, add/store saturated 8-bit.
sub r1, r1, r2, lsl #3
pop {pc}
- .endfunc
+endfunc
/*
Compute 8 IDCT row transforms.
add sp, sp, #128
pop {r4-r11, pc}
- .endfunc
+endfunc
/* ff_simple_idct_add_armv6(uint8_t *dest, int line_size, DCTELEM *data); */
function ff_simple_idct_add_armv6, export=1
add sp, sp, #(128+8)
pop {r4-r11, pc}
- .endfunc
+endfunc
/* ff_simple_idct_put_armv6(uint8_t *dest, int line_size, DCTELEM *data); */
function ff_simple_idct_put_armv6, export=1
add sp, sp, #(128+8)
pop {r4-r11, pc}
- .endfunc
+endfunc
add r3, r3, r1, lsl #1
pld [r3]
pld [r3, r1]
- .endfunc
+endfunc
function idct_row4_neon
vmov.i32 q15, #(1<<(ROW_SHIFT-1))
vst1.64 {d6-d9}, [r2,:128]!
bx lr
- .endfunc
+endfunc
function idct_col4_neon
mov ip, #16
vsubhn.i32 d6, q14, q6
bx lr
- .endfunc
+endfunc
.align 6
vst1.32 {d5[1]}, [r0,:32], r1
bx lr
- .endfunc
+endfunc
.section .rodata
.align 4
bl idct_col4_st8_neon
idct_end
- .endfunc
+endfunc
.align 6
vst1.32 {d5[1]}, [ip,:32], r1
bx lr
- .endfunc
+endfunc
/* void ff_simple_idct_add_neon(uint8_t *dst, int line_size, DCTELEM *data); */
function ff_simple_idct_add_neon, export=1
bl idct_col4_add8_neon
idct_end
- .endfunc
+endfunc
.align 6
vst1.64 {d9}, [r2,:64], ip
bx lr
- .endfunc
+endfunc
/* void ff_simple_idct_neon(DCTELEM *data); */
function ff_simple_idct_neon, export=1
bl idct_col4_st16_neon
idct_end
- .endfunc
+endfunc
vst1.64 {d0}, [ip,:64], r1
vst1.64 {d1}, [ip,:64], r1
bx lr
-.endfunc
+endfunc
function ff_vp3_h_loop_filter_neon, export=1
sub ip, r0, #1
vst1.16 {d0[3]}, [ip], r1
vst1.16 {d1[3]}, [ip], r1
bx lr
-.endfunc
+endfunc
function vp3_idct_start_neon
vadd.s16 q1, q8, q12
vsub.s16 q8, q8, q12
vld1.64 {d28-d31}, [r2,:128]!
-.endfunc
+endfunc
function vp3_idct_core_neon
vmull.s16 q2, d18, xC1S7 // (ip[1] * C1) << 16
vadd.s16 q10, q1, q2 // Ad = (A - C) * C4
vsub.s16 q14, q4, q3 // H = ip[2] * C6 - ip[6] * C2
bx lr
-.endfunc
+endfunc
.macro VP3_IDCT_END type
function vp3_idct_end_\type\()_neon
vswp d23, d30
.endif
bx lr
-.endfunc
+endfunc
.endm
VP3_IDCT_END row
vst1.64 {d24-d27}, [r0,:128]!
vst1.64 {d28-d31}, [r0,:128]!
bx lr
-.endfunc
+endfunc
function ff_vp3_idct_put_neon, export=1
mov ip, lr
vst1.64 {d6}, [r0,:64], r1
vst1.64 {d7}, [r0,:64], r1
bx lr
-.endfunc
+endfunc
function ff_vp3_idct_add_neon, export=1
mov ip, lr
vst1.64 {d6}, [r2,:64], r1
vst1.64 {d7}, [r2,:64], r1
bx lr
-.endfunc
+endfunc