;void vp8_intra_pred_uv_dc_mmx2(
; unsigned char *dst,
; int dst_stride
-; unsigned char *src,
-; int src_stride,
+; unsigned char *above,
+; unsigned char *left,
+; int left_stride,
; )
global sym(vp8_intra_pred_uv_dc_mmx2)
sym(vp8_intra_pred_uv_dc_mmx2):
push rbp
mov rbp, rsp
- SHADOW_ARGS_TO_STACK 4
+ SHADOW_ARGS_TO_STACK 5
push rsi
push rdi
; end prolog
; from top
- mov rsi, arg(2) ;src;
- movsxd rax, dword ptr arg(3) ;src_stride;
- sub rsi, rax
+ mov rsi, arg(2) ;above;
pxor mm0, mm0
movq mm1, [rsi]
psadbw mm1, mm0
; from left
- dec rsi
+ mov rsi, arg(3) ;left;
+ movsxd rax, dword ptr arg(4) ;left_stride;
lea rdi, [rax*3]
- movzx ecx, byte [rsi+rax]
+ movzx ecx, byte [rsi]
+ movzx edx, byte [rsi+rax*1]
+ add ecx, edx
movzx edx, byte [rsi+rax*2]
add ecx, edx
+
+
movzx edx, byte [rsi+rdi]
- add ecx, edx
lea rsi, [rsi+rax*4]
+ add ecx, edx
movzx edx, byte [rsi]
add ecx, edx
movzx edx, byte [rsi+rax]
add ecx, edx
movzx edx, byte [rsi+rdi]
add ecx, edx
- movzx edx, byte [rsi+rax*4]
- add ecx, edx
; add up
pextrw edx, mm1, 0x0
;void vp8_intra_pred_uv_dctop_mmx2(
; unsigned char *dst,
; int dst_stride
-; unsigned char *src,
-; int src_stride,
+; unsigned char *above,
+; unsigned char *left,
+; int left_stride,
; )
global sym(vp8_intra_pred_uv_dctop_mmx2)
sym(vp8_intra_pred_uv_dctop_mmx2):
push rbp
mov rbp, rsp
- SHADOW_ARGS_TO_STACK 4
+ SHADOW_ARGS_TO_STACK 5
GET_GOT rbx
push rsi
push rdi
; end prolog
+ ;arg(3), arg(4) not used
+
; from top
- mov rsi, arg(2) ;src;
- movsxd rax, dword ptr arg(3) ;src_stride;
- sub rsi, rax
+ mov rsi, arg(2) ;above;
pxor mm0, mm0
movq mm1, [rsi]
psadbw mm1, mm0
;void vp8_intra_pred_uv_dcleft_mmx2(
; unsigned char *dst,
; int dst_stride
-; unsigned char *src,
-; int src_stride,
+; unsigned char *above,
+; unsigned char *left,
+; int left_stride,
; )
global sym(vp8_intra_pred_uv_dcleft_mmx2)
sym(vp8_intra_pred_uv_dcleft_mmx2):
push rbp
mov rbp, rsp
- SHADOW_ARGS_TO_STACK 4
+ SHADOW_ARGS_TO_STACK 5
push rsi
push rdi
; end prolog
+ ;arg(2) not used
+
; from left
- mov rsi, arg(2) ;src;
- movsxd rax, dword ptr arg(3) ;src_stride;
- dec rsi
+ mov rsi, arg(3) ;left;
+ movsxd rax, dword ptr arg(4) ;left_stride;
lea rdi, [rax*3]
movzx ecx, byte [rsi]
movzx edx, byte [rsi+rax]
;void vp8_intra_pred_uv_dc128_mmx(
; unsigned char *dst,
; int dst_stride
-; unsigned char *src,
-; int src_stride,
+; unsigned char *above,
+; unsigned char *left,
+; int left_stride,
; )
global sym(vp8_intra_pred_uv_dc128_mmx)
sym(vp8_intra_pred_uv_dc128_mmx):
push rbp
mov rbp, rsp
- SHADOW_ARGS_TO_STACK 4
+ SHADOW_ARGS_TO_STACK 5
GET_GOT rbx
; end prolog
+ ;arg(2), arg(3), arg(4) not used
+
; write out
movq mm1, [GLOBAL(dc_128)]
mov rax, arg(0) ;dst;
;void vp8_intra_pred_uv_tm_sse2(
; unsigned char *dst,
; int dst_stride
-; unsigned char *src,
-; int src_stride,
+; unsigned char *above,
+; unsigned char *left,
+; int left_stride,
; )
%macro vp8_intra_pred_uv_tm 1
global sym(vp8_intra_pred_uv_tm_%1)
sym(vp8_intra_pred_uv_tm_%1):
push rbp
mov rbp, rsp
- SHADOW_ARGS_TO_STACK 4
+ SHADOW_ARGS_TO_STACK 5
GET_GOT rbx
push rsi
push rdi
; read top row
mov edx, 4
- mov rsi, arg(2) ;src;
- movsxd rax, dword ptr arg(3) ;src_stride;
- sub rsi, rax
+ mov rsi, arg(2) ;above
+ movsxd rax, dword ptr arg(4) ;left_stride;
pxor xmm0, xmm0
%ifidn %1, ssse3
movdqa xmm2, [GLOBAL(dc_1024)]
; set up left ptrs ans subtract topleft
movd xmm3, [rsi-1]
- lea rsi, [rsi+rax-1]
+ mov rsi, arg(3) ;left;
%ifidn %1, sse2
punpcklbw xmm3, xmm0
pshuflw xmm3, xmm3, 0x0
;void vp8_intra_pred_uv_ve_mmx(
; unsigned char *dst,
; int dst_stride
-; unsigned char *src,
-; int src_stride,
+; unsigned char *above,
+; unsigned char *left,
+; int left_stride,
; )
global sym(vp8_intra_pred_uv_ve_mmx)
sym(vp8_intra_pred_uv_ve_mmx):
push rbp
mov rbp, rsp
- SHADOW_ARGS_TO_STACK 4
+ SHADOW_ARGS_TO_STACK 5
; end prolog
+ ; arg(3), arg(4) not used
+
; read from top
mov rax, arg(2) ;src;
- movsxd rdx, dword ptr arg(3) ;src_stride;
- sub rax, rdx
+
movq mm1, [rax]
; write out
;void vp8_intra_pred_uv_ho_mmx2(
; unsigned char *dst,
; int dst_stride
-; unsigned char *src,
-; int src_stride,
+; unsigned char *above,
+; unsigned char *left,
+; int left_stride,
; )
%macro vp8_intra_pred_uv_ho 1
global sym(vp8_intra_pred_uv_ho_%1)
sym(vp8_intra_pred_uv_ho_%1):
push rbp
mov rbp, rsp
- SHADOW_ARGS_TO_STACK 4
+ SHADOW_ARGS_TO_STACK 5
push rsi
push rdi
%ifidn %1, ssse3
%endif
; end prolog
+ ;arg(2) not used
+
; read from left and write out
%ifidn %1, mmx2
mov edx, 4
%endif
- mov rsi, arg(2) ;src;
- movsxd rax, dword ptr arg(3) ;src_stride;
+ mov rsi, arg(3) ;left
+ movsxd rax, dword ptr arg(4) ;left_stride;
mov rdi, arg(0) ;dst;
movsxd rcx, dword ptr arg(1) ;dst_stride
%ifidn %1, ssse3
movdqa xmm2, [GLOBAL(dc_00001111)]
lea rbx, [rax*3]
%endif
- dec rsi
+
%ifidn %1, mmx2
.vp8_intra_pred_uv_ho_%1_loop:
movd mm0, [rsi]
#define build_intra_predictors_mbuv_prototype(sym) \
void sym(unsigned char *dst, int dst_stride, \
- const unsigned char *src, int src_stride)
+ const unsigned char *above, \
+ const unsigned char *left, int left_stride)
typedef build_intra_predictors_mbuv_prototype((*build_intra_predictors_mbuv_fn_t));
extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_dc_mmx2);
extern build_intra_predictors_mbuv_prototype(vp8_intra_pred_uv_tm_ssse3);
static void vp8_build_intra_predictors_mbuv_x86(MACROBLOCKD *x,
+ unsigned char * uabove_row,
+ unsigned char * vabove_row,
unsigned char *dst_u,
unsigned char *dst_v,
int dst_stride,
+ unsigned char * uleft,
+ unsigned char * vleft,
+ int left_stride,
build_intra_predictors_mbuv_fn_t tm_func,
build_intra_predictors_mbuv_fn_t ho_func)
{
int mode = x->mode_info_context->mbmi.uv_mode;
build_intra_predictors_mbuv_fn_t fn;
- int src_stride = x->dst.uv_stride;
switch (mode) {
case V_PRED: fn = vp8_intra_pred_uv_ve_mmx; break;
default: return;
}
- fn(dst_u, dst_stride, x->dst.u_buffer, src_stride);
- fn(dst_v, dst_stride, x->dst.v_buffer, src_stride);
+ fn(dst_u, dst_stride, uabove_row, uleft, left_stride);
+ fn(dst_v, dst_stride, vabove_row, vleft, left_stride);
}
-void vp8_build_intra_predictors_mbuv_sse2(MACROBLOCKD *x)
+void vp8_build_intra_predictors_mbuv_s_sse2(MACROBLOCKD *x,
+ unsigned char * uabove_row,
+ unsigned char * vabove_row,
+ unsigned char * uleft,
+ unsigned char * vleft,
+ int left_stride,
+ unsigned char * upred_ptr,
+ unsigned char * vpred_ptr,
+ int pred_stride)
{
- vp8_build_intra_predictors_mbuv_x86(x, &x->predictor[256],
- &x->predictor[320], 8,
+ vp8_build_intra_predictors_mbuv_x86(x,
+ uabove_row, vabove_row,
+ upred_ptr,
+ vpred_ptr, pred_stride,
+ uleft,
+ vleft,
+ left_stride,
vp8_intra_pred_uv_tm_sse2,
vp8_intra_pred_uv_ho_mmx2);
}
-void vp8_build_intra_predictors_mbuv_ssse3(MACROBLOCKD *x)
+void vp8_build_intra_predictors_mbuv_s_ssse3(MACROBLOCKD *x,
+ unsigned char * uabove_row,
+ unsigned char * vabove_row,
+ unsigned char * uleft,
+ unsigned char * vleft,
+ int left_stride,
+ unsigned char * upred_ptr,
+ unsigned char * vpred_ptr,
+ int pred_stride)
{
- vp8_build_intra_predictors_mbuv_x86(x, &x->predictor[256],
- &x->predictor[320], 8,
- vp8_intra_pred_uv_tm_ssse3,
- vp8_intra_pred_uv_ho_ssse3);
-}
-
-void vp8_build_intra_predictors_mbuv_s_sse2(MACROBLOCKD *x)
-{
- vp8_build_intra_predictors_mbuv_x86(x, x->dst.u_buffer,
- x->dst.v_buffer, x->dst.uv_stride,
- vp8_intra_pred_uv_tm_sse2,
- vp8_intra_pred_uv_ho_mmx2);
-}
-
-void vp8_build_intra_predictors_mbuv_s_ssse3(MACROBLOCKD *x)
-{
- vp8_build_intra_predictors_mbuv_x86(x, x->dst.u_buffer,
- x->dst.v_buffer, x->dst.uv_stride,
+ vp8_build_intra_predictors_mbuv_x86(x,
+ uabove_row, vabove_row,
+ upred_ptr,
+ vpred_ptr, pred_stride,
+ uleft,
+ vleft,
+ left_stride,
vp8_intra_pred_uv_tm_ssse3,
vp8_intra_pred_uv_ho_ssse3);
}
default: return;
}
- fn(dst_y, dst_stride, x->dst.y_buffer, src_stride);
+// fn(dst_y, dst_stride, x->dst.y_buffer, src_stride);
return;
}
-void vp8_build_intra_predictors_mby_sse2(MACROBLOCKD *x)
-{
- vp8_build_intra_predictors_mby_x86(x, x->predictor, 16,
- vp8_intra_pred_y_tm_sse2);
-}
-
-void vp8_build_intra_predictors_mby_ssse3(MACROBLOCKD *x)
-{
- vp8_build_intra_predictors_mby_x86(x, x->predictor, 16,
- vp8_intra_pred_y_tm_ssse3);
-}
-
void vp8_build_intra_predictors_mby_s_sse2(MACROBLOCKD *x)
{
vp8_build_intra_predictors_mby_x86(x, x->dst.y_buffer, x->dst.y_stride,