From: Linfeng Zhang Date: Wed, 11 May 2016 19:39:42 +0000 (-0700) Subject: remove mmx variance functions X-Git-Tag: v1.6.0~136^2 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=d0ffae825d78f23ddb43f039ede525f06428b1c3;p=platform%2Fupstream%2Flibvpx.git remove mmx variance functions there are sse2 equivalents which is a reasonable modern baseline Removed mmx variance functions: vpx_get_mb_ss_mmx() vpx_get8x8var_mmx() vpx_get4x4var_mmx() vpx_variance4x4_mmx() vpx_variance8x8_mmx() vpx_mse16x16_mmx() vpx_variance16x16_mmx() vpx_variance16x8_mmx() vpx_variance8x16_mmx() Change-Id: Iffaf85344c6676a3dd337c0645a2dd5deb2f86a1 --- diff --git a/test/variance_test.cc b/test/variance_test.cc index a9ca07c..a6efc92 100644 --- a/test/variance_test.cc +++ b/test/variance_test.cc @@ -977,20 +977,6 @@ INSTANTIATE_TEST_CASE_P( #endif // CONFIG_VP9_HIGHBITDEPTH #if HAVE_MMX -INSTANTIATE_TEST_CASE_P(MMX, VpxMseTest, - ::testing::Values(make_tuple(4, 4, &vpx_mse16x16_mmx))); - -INSTANTIATE_TEST_CASE_P(MMX, SumOfSquaresTest, - ::testing::Values(vpx_get_mb_ss_mmx)); - -INSTANTIATE_TEST_CASE_P( - MMX, VpxVarianceTest, - ::testing::Values(make_tuple(4, 4, &vpx_variance16x16_mmx, 0), - make_tuple(4, 3, &vpx_variance16x8_mmx, 0), - make_tuple(3, 4, &vpx_variance8x16_mmx, 0), - make_tuple(3, 3, &vpx_variance8x8_mmx, 0), - make_tuple(2, 2, &vpx_variance4x4_mmx, 0))); - INSTANTIATE_TEST_CASE_P( MMX, VpxSubpelVarianceTest, ::testing::Values(make_tuple(4, 4, &vpx_sub_pixel_variance16x16_mmx, 0), diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index f883ce5..ab5b687 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -1407,16 +1407,16 @@ add_proto qw/unsigned int vpx_variance16x32/, "const uint8_t *src_ptr, int sourc specialize qw/vpx_variance16x32 sse2 msa/; add_proto qw/unsigned int vpx_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance16x16 mmx sse2 avx2 media neon msa/; + specialize qw/vpx_variance16x16 sse2 avx2 media neon msa/; add_proto qw/unsigned int vpx_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance16x8 mmx sse2 neon msa/; + specialize qw/vpx_variance16x8 sse2 neon msa/; add_proto qw/unsigned int vpx_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance8x16 mmx sse2 neon msa/; + specialize qw/vpx_variance8x16 sse2 neon msa/; add_proto qw/unsigned int vpx_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance8x8 mmx sse2 media neon msa/; + specialize qw/vpx_variance8x8 sse2 media neon msa/; add_proto qw/unsigned int vpx_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vpx_variance8x4 sse2 msa/; @@ -1425,7 +1425,7 @@ add_proto qw/unsigned int vpx_variance4x8/, "const uint8_t *src_ptr, int source_ specialize qw/vpx_variance4x8 sse2 msa/; add_proto qw/unsigned int vpx_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance4x4 mmx sse2 msa/; + specialize qw/vpx_variance4x4 sse2 msa/; # # Specialty Variance @@ -1434,10 +1434,10 @@ add_proto qw/void vpx_get16x16var/, "const uint8_t *src_ptr, int source_stride, specialize qw/vpx_get16x16var sse2 avx2 neon msa/; add_proto qw/void vpx_get8x8var/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"; - specialize qw/vpx_get8x8var mmx sse2 neon msa/; + specialize qw/vpx_get8x8var sse2 neon msa/; add_proto qw/unsigned int vpx_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; - specialize qw/vpx_mse16x16 mmx sse2 avx2 media neon msa/; + specialize qw/vpx_mse16x16 sse2 avx2 media neon msa/; add_proto qw/unsigned int vpx_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; specialize qw/vpx_mse16x8 sse2 msa/; @@ -1449,7 +1449,7 @@ add_proto qw/unsigned int vpx_mse8x8/, "const uint8_t *src_ptr, int source_stri specialize qw/vpx_mse8x8 sse2 msa/; add_proto qw/unsigned int vpx_get_mb_ss/, "const int16_t *"; - specialize qw/vpx_get_mb_ss mmx sse2 msa/; + specialize qw/vpx_get_mb_ss sse2 msa/; add_proto qw/unsigned int vpx_get4x4sse_cs/, "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride"; specialize qw/vpx_get4x4sse_cs neon msa/; diff --git a/vpx_dsp/x86/variance_impl_mmx.asm b/vpx_dsp/x86/variance_impl_mmx.asm index b8ba79b..f4de42a 100644 --- a/vpx_dsp/x86/variance_impl_mmx.asm +++ b/vpx_dsp/x86/variance_impl_mmx.asm @@ -13,407 +13,6 @@ %define mmx_filter_shift 7 -;unsigned int vpx_get_mb_ss_mmx( short *src_ptr ) -global sym(vpx_get_mb_ss_mmx) PRIVATE -sym(vpx_get_mb_ss_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 7 - GET_GOT rbx - push rsi - push rdi - sub rsp, 8 - ; end prolog - - mov rax, arg(0) ;src_ptr - mov rcx, 16 - pxor mm4, mm4 - -.NEXTROW: - movq mm0, [rax] - movq mm1, [rax+8] - movq mm2, [rax+16] - movq mm3, [rax+24] - pmaddwd mm0, mm0 - pmaddwd mm1, mm1 - pmaddwd mm2, mm2 - pmaddwd mm3, mm3 - - paddd mm4, mm0 - paddd mm4, mm1 - paddd mm4, mm2 - paddd mm4, mm3 - - add rax, 32 - dec rcx - ja .NEXTROW - movq QWORD PTR [rsp], mm4 - - ;return sum[0]+sum[1]; - movsxd rax, dword ptr [rsp] - movsxd rcx, dword ptr [rsp+4] - add rax, rcx - - ; begin epilog - add rsp, 8 - pop rdi - pop rsi - RESTORE_GOT - UNSHADOW_ARGS - pop rbp - ret - -;void vpx_get8x8var_mmx -;( -; unsigned char *src_ptr, -; int source_stride, -; unsigned char *ref_ptr, -; int recon_stride, -; unsigned int *SSE, -; int *Sum -;) -global sym(vpx_get8x8var_mmx) PRIVATE -sym(vpx_get8x8var_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - push rsi - push rdi - push rbx - sub rsp, 16 - ; end prolog - - pxor mm5, mm5 ; Blank mmx6 - pxor mm6, mm6 ; Blank mmx7 - pxor mm7, mm7 ; Blank mmx7 - - mov rax, arg(0) ;[src_ptr] ; Load base addresses - mov rbx, arg(2) ;[ref_ptr] - movsxd rcx, dword ptr arg(1) ;[source_stride] - movsxd rdx, dword ptr arg(3) ;[recon_stride] - - ; Row 1 - movq mm0, [rax] ; Copy eight bytes to mm0 - movq mm1, [rbx] ; Copy eight bytes to mm1 - movq mm2, mm0 ; Take copies - movq mm3, mm1 ; Take copies - - punpcklbw mm0, mm6 ; unpack to higher prrcision - punpcklbw mm1, mm6 - punpckhbw mm2, mm6 ; unpack to higher prrcision - punpckhbw mm3, mm6 - psubsw mm0, mm1 ; A-B (low order) to MM0 - psubsw mm2, mm3 ; A-B (high order) to MM2 - - paddw mm5, mm0 ; accumulate differences in mm5 - paddw mm5, mm2 ; accumulate differences in mm5 - - pmaddwd mm0, mm0 ; square and accumulate - pmaddwd mm2, mm2 ; square and accumulate - add rbx,rdx ; Inc pointer into ref data - add rax,rcx ; Inc pointer into the new data - movq mm1, [rbx] ; Copy eight bytes to mm1 - paddd mm7, mm0 ; accumulate in mm7 - paddd mm7, mm2 ; accumulate in mm7 - - ; Row 2 - movq mm0, [rax] ; Copy eight bytes to mm0 - movq mm2, mm0 ; Take copies - movq mm3, mm1 ; Take copies - - punpcklbw mm0, mm6 ; unpack to higher prrcision - punpcklbw mm1, mm6 - punpckhbw mm2, mm6 ; unpack to higher prrcision - punpckhbw mm3, mm6 - psubsw mm0, mm1 ; A-B (low order) to MM0 - psubsw mm2, mm3 ; A-B (high order) to MM2 - - paddw mm5, mm0 ; accumulate differences in mm5 - paddw mm5, mm2 ; accumulate differences in mm5 - - pmaddwd mm0, mm0 ; square and accumulate - pmaddwd mm2, mm2 ; square and accumulate - add rbx,rdx ; Inc pointer into ref data - add rax,rcx ; Inc pointer into the new data - movq mm1, [rbx] ; Copy eight bytes to mm1 - paddd mm7, mm0 ; accumulate in mm7 - paddd mm7, mm2 ; accumulate in mm7 - - ; Row 3 - movq mm0, [rax] ; Copy eight bytes to mm0 - movq mm2, mm0 ; Take copies - movq mm3, mm1 ; Take copies - - punpcklbw mm0, mm6 ; unpack to higher prrcision - punpcklbw mm1, mm6 - punpckhbw mm2, mm6 ; unpack to higher prrcision - punpckhbw mm3, mm6 - psubsw mm0, mm1 ; A-B (low order) to MM0 - psubsw mm2, mm3 ; A-B (high order) to MM2 - - paddw mm5, mm0 ; accumulate differences in mm5 - paddw mm5, mm2 ; accumulate differences in mm5 - - pmaddwd mm0, mm0 ; square and accumulate - pmaddwd mm2, mm2 ; square and accumulate - add rbx,rdx ; Inc pointer into ref data - add rax,rcx ; Inc pointer into the new data - movq mm1, [rbx] ; Copy eight bytes to mm1 - paddd mm7, mm0 ; accumulate in mm7 - paddd mm7, mm2 ; accumulate in mm7 - - ; Row 4 - movq mm0, [rax] ; Copy eight bytes to mm0 - movq mm2, mm0 ; Take copies - movq mm3, mm1 ; Take copies - - punpcklbw mm0, mm6 ; unpack to higher prrcision - punpcklbw mm1, mm6 - punpckhbw mm2, mm6 ; unpack to higher prrcision - punpckhbw mm3, mm6 - psubsw mm0, mm1 ; A-B (low order) to MM0 - psubsw mm2, mm3 ; A-B (high order) to MM2 - - paddw mm5, mm0 ; accumulate differences in mm5 - paddw mm5, mm2 ; accumulate differences in mm5 - - pmaddwd mm0, mm0 ; square and accumulate - pmaddwd mm2, mm2 ; square and accumulate - add rbx,rdx ; Inc pointer into ref data - add rax,rcx ; Inc pointer into the new data - movq mm1, [rbx] ; Copy eight bytes to mm1 - paddd mm7, mm0 ; accumulate in mm7 - paddd mm7, mm2 ; accumulate in mm7 - - ; Row 5 - movq mm0, [rax] ; Copy eight bytes to mm0 - movq mm2, mm0 ; Take copies - movq mm3, mm1 ; Take copies - - punpcklbw mm0, mm6 ; unpack to higher prrcision - punpcklbw mm1, mm6 - punpckhbw mm2, mm6 ; unpack to higher prrcision - punpckhbw mm3, mm6 - psubsw mm0, mm1 ; A-B (low order) to MM0 - psubsw mm2, mm3 ; A-B (high order) to MM2 - - paddw mm5, mm0 ; accumulate differences in mm5 - paddw mm5, mm2 ; accumulate differences in mm5 - - pmaddwd mm0, mm0 ; square and accumulate - pmaddwd mm2, mm2 ; square and accumulate - add rbx,rdx ; Inc pointer into ref data - add rax,rcx ; Inc pointer into the new data - movq mm1, [rbx] ; Copy eight bytes to mm1 - ; movq mm4, [rbx + rdx] - paddd mm7, mm0 ; accumulate in mm7 - paddd mm7, mm2 ; accumulate in mm7 - - ; Row 6 - movq mm0, [rax] ; Copy eight bytes to mm0 - movq mm2, mm0 ; Take copies - movq mm3, mm1 ; Take copies - - punpcklbw mm0, mm6 ; unpack to higher prrcision - punpcklbw mm1, mm6 - punpckhbw mm2, mm6 ; unpack to higher prrcision - punpckhbw mm3, mm6 - psubsw mm0, mm1 ; A-B (low order) to MM0 - psubsw mm2, mm3 ; A-B (high order) to MM2 - - paddw mm5, mm0 ; accumulate differences in mm5 - paddw mm5, mm2 ; accumulate differences in mm5 - - pmaddwd mm0, mm0 ; square and accumulate - pmaddwd mm2, mm2 ; square and accumulate - add rbx,rdx ; Inc pointer into ref data - add rax,rcx ; Inc pointer into the new data - movq mm1, [rbx] ; Copy eight bytes to mm1 - paddd mm7, mm0 ; accumulate in mm7 - paddd mm7, mm2 ; accumulate in mm7 - - ; Row 7 - movq mm0, [rax] ; Copy eight bytes to mm0 - movq mm2, mm0 ; Take copies - movq mm3, mm1 ; Take copies - - punpcklbw mm0, mm6 ; unpack to higher prrcision - punpcklbw mm1, mm6 - punpckhbw mm2, mm6 ; unpack to higher prrcision - punpckhbw mm3, mm6 - psubsw mm0, mm1 ; A-B (low order) to MM0 - psubsw mm2, mm3 ; A-B (high order) to MM2 - - paddw mm5, mm0 ; accumulate differences in mm5 - paddw mm5, mm2 ; accumulate differences in mm5 - - pmaddwd mm0, mm0 ; square and accumulate - pmaddwd mm2, mm2 ; square and accumulate - add rbx,rdx ; Inc pointer into ref data - add rax,rcx ; Inc pointer into the new data - movq mm1, [rbx] ; Copy eight bytes to mm1 - paddd mm7, mm0 ; accumulate in mm7 - paddd mm7, mm2 ; accumulate in mm7 - - ; Row 8 - movq mm0, [rax] ; Copy eight bytes to mm0 - movq mm2, mm0 ; Take copies - movq mm3, mm1 ; Take copies - - punpcklbw mm0, mm6 ; unpack to higher prrcision - punpcklbw mm1, mm6 - punpckhbw mm2, mm6 ; unpack to higher prrcision - punpckhbw mm3, mm6 - psubsw mm0, mm1 ; A-B (low order) to MM0 - psubsw mm2, mm3 ; A-B (high order) to MM2 - - paddw mm5, mm0 ; accumulate differences in mm5 - paddw mm5, mm2 ; accumulate differences in mm5 - - pmaddwd mm0, mm0 ; square and accumulate - pmaddwd mm2, mm2 ; square and accumulate - add rbx,rdx ; Inc pointer into ref data - add rax,rcx ; Inc pointer into the new data - paddd mm7, mm0 ; accumulate in mm7 - paddd mm7, mm2 ; accumulate in mm7 - - ; Now accumulate the final results. - movq QWORD PTR [rsp+8], mm5 ; copy back accumulated results into normal memory - movq QWORD PTR [rsp], mm7 ; copy back accumulated results into normal memory - movsx rdx, WORD PTR [rsp+8] - movsx rcx, WORD PTR [rsp+10] - movsx rbx, WORD PTR [rsp+12] - movsx rax, WORD PTR [rsp+14] - add rdx, rcx - add rbx, rax - add rdx, rbx ;XSum - movsxd rax, DWORD PTR [rsp] - movsxd rcx, DWORD PTR [rsp+4] - add rax, rcx ;XXSum - mov rsi, arg(4) ;SSE - mov rdi, arg(5) ;Sum - mov dword ptr [rsi], eax - mov dword ptr [rdi], edx - xor rax, rax ; return 0 - - ; begin epilog - add rsp, 16 - pop rbx - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - -;void -;vpx_get4x4var_mmx -;( -; unsigned char *src_ptr, -; int source_stride, -; unsigned char *ref_ptr, -; int recon_stride, -; unsigned int *SSE, -; int *Sum -;) -global sym(vpx_get4x4var_mmx) PRIVATE -sym(vpx_get4x4var_mmx): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 6 - push rsi - push rdi - push rbx - sub rsp, 16 - ; end prolog - - pxor mm5, mm5 ; Blank mmx6 - pxor mm6, mm6 ; Blank mmx7 - pxor mm7, mm7 ; Blank mmx7 - - mov rax, arg(0) ;[src_ptr] ; Load base addresses - mov rbx, arg(2) ;[ref_ptr] - movsxd rcx, dword ptr arg(1) ;[source_stride] - movsxd rdx, dword ptr arg(3) ;[recon_stride] - - ; Row 1 - movd mm0, [rax] ; Copy four bytes to mm0 - movd mm1, [rbx] ; Copy four bytes to mm1 - punpcklbw mm0, mm6 ; unpack to higher prrcision - punpcklbw mm1, mm6 - psubsw mm0, mm1 ; A-B (low order) to MM0 - paddw mm5, mm0 ; accumulate differences in mm5 - pmaddwd mm0, mm0 ; square and accumulate - add rbx,rdx ; Inc pointer into ref data - add rax,rcx ; Inc pointer into the new data - movd mm1, [rbx] ; Copy four bytes to mm1 - paddd mm7, mm0 ; accumulate in mm7 - - ; Row 2 - movd mm0, [rax] ; Copy four bytes to mm0 - punpcklbw mm0, mm6 ; unpack to higher prrcision - punpcklbw mm1, mm6 - psubsw mm0, mm1 ; A-B (low order) to MM0 - paddw mm5, mm0 ; accumulate differences in mm5 - - pmaddwd mm0, mm0 ; square and accumulate - add rbx,rdx ; Inc pointer into ref data - add rax,rcx ; Inc pointer into the new data - movd mm1, [rbx] ; Copy four bytes to mm1 - paddd mm7, mm0 ; accumulate in mm7 - - ; Row 3 - movd mm0, [rax] ; Copy four bytes to mm0 - punpcklbw mm0, mm6 ; unpack to higher precision - punpcklbw mm1, mm6 - psubsw mm0, mm1 ; A-B (low order) to MM0 - paddw mm5, mm0 ; accumulate differences in mm5 - - pmaddwd mm0, mm0 ; square and accumulate - add rbx,rdx ; Inc pointer into ref data - add rax,rcx ; Inc pointer into the new data - movd mm1, [rbx] ; Copy four bytes to mm1 - paddd mm7, mm0 ; accumulate in mm7 - - ; Row 4 - movd mm0, [rax] ; Copy four bytes to mm0 - - punpcklbw mm0, mm6 ; unpack to higher prrcision - punpcklbw mm1, mm6 - psubsw mm0, mm1 ; A-B (low order) to MM0 - - paddw mm5, mm0 ; accumulate differences in mm5 - - pmaddwd mm0, mm0 ; square and accumulate - paddd mm7, mm0 ; accumulate in mm7 - - ; Now accumulate the final results. - movq QWORD PTR [rsp+8], mm5 ; copy back accumulated results into normal memory - movq QWORD PTR [rsp], mm7 ; copy back accumulated results into normal memory - movsx rdx, WORD PTR [rsp+8] - movsx rcx, WORD PTR [rsp+10] - movsx rbx, WORD PTR [rsp+12] - movsx rax, WORD PTR [rsp+14] - add rdx, rcx - add rbx, rax - add rdx, rbx ;XSum - movsxd rax, DWORD PTR [rsp] - movsxd rcx, DWORD PTR [rsp+4] - add rax, rcx ;XXSum - mov rsi, arg(4) ;SSE - mov rdi, arg(5) ;Sum - mov dword ptr [rsi], eax - mov dword ptr [rdi], edx - xor rax, rax ; return 0 - - ; begin epilog - add rsp, 16 - pop rbx - pop rdi - pop rsi - UNSHADOW_ARGS - pop rbp - ret - ;void vpx_filter_block2d_bil4x4_var_mmx ;( ; unsigned char *ref_ptr, diff --git a/vpx_dsp/x86/variance_mmx.c b/vpx_dsp/x86/variance_mmx.c index f04f4e2..636231d 100644 --- a/vpx_dsp/x86/variance_mmx.c +++ b/vpx_dsp/x86/variance_mmx.c @@ -23,10 +23,6 @@ DECLARE_ALIGNED(16, static const int16_t, bilinear_filters_mmx[8][8]) = { { 16, 16, 16, 16, 112, 112, 112, 112 } }; -extern void vpx_get4x4var_mmx(const uint8_t *a, int a_stride, - const uint8_t *b, int b_stride, - unsigned int *sse, int *sum); - extern void vpx_filter_block2d_bil4x4_var_mmx(const unsigned char *ref_ptr, int ref_pixels_per_line, const unsigned char *src_ptr, @@ -47,98 +43,6 @@ extern void vpx_filter_block2d_bil_var_mmx(const unsigned char *ref_ptr, unsigned int *sumsquared); -unsigned int vpx_variance4x4_mmx(const unsigned char *a, int a_stride, - const unsigned char *b, int b_stride, - unsigned int *sse) { - unsigned int var; - int avg; - - vpx_get4x4var_mmx(a, a_stride, b, b_stride, &var, &avg); - *sse = var; - return (var - (((unsigned int)avg * avg) >> 4)); -} - -unsigned int vpx_variance8x8_mmx(const unsigned char *a, int a_stride, - const unsigned char *b, int b_stride, - unsigned int *sse) { - unsigned int var; - int avg; - - vpx_get8x8var_mmx(a, a_stride, b, b_stride, &var, &avg); - *sse = var; - - return (var - (((unsigned int)avg * avg) >> 6)); -} - -unsigned int vpx_mse16x16_mmx(const unsigned char *a, int a_stride, - const unsigned char *b, int b_stride, - unsigned int *sse) { - unsigned int sse0, sse1, sse2, sse3, var; - int sum0, sum1, sum2, sum3; - - vpx_get8x8var_mmx(a, a_stride, b, b_stride, &sse0, &sum0); - vpx_get8x8var_mmx(a + 8, a_stride, b + 8, b_stride, &sse1, &sum1); - vpx_get8x8var_mmx(a + 8 * a_stride, a_stride, - b + 8 * b_stride, b_stride, &sse2, &sum2); - vpx_get8x8var_mmx(a + 8 * a_stride + 8, a_stride, - b + 8 * b_stride + 8, b_stride, &sse3, &sum3); - - var = sse0 + sse1 + sse2 + sse3; - *sse = var; - return var; -} - -unsigned int vpx_variance16x16_mmx(const unsigned char *a, int a_stride, - const unsigned char *b, int b_stride, - unsigned int *sse) { - unsigned int sse0, sse1, sse2, sse3, var; - int sum0, sum1, sum2, sum3, avg; - - vpx_get8x8var_mmx(a, a_stride, b, b_stride, &sse0, &sum0); - vpx_get8x8var_mmx(a + 8, a_stride, b + 8, b_stride, &sse1, &sum1); - vpx_get8x8var_mmx(a + 8 * a_stride, a_stride, - b + 8 * b_stride, b_stride, &sse2, &sum2); - vpx_get8x8var_mmx(a + 8 * a_stride + 8, a_stride, - b + 8 * b_stride + 8, b_stride, &sse3, &sum3); - - var = sse0 + sse1 + sse2 + sse3; - avg = sum0 + sum1 + sum2 + sum3; - *sse = var; - return (var - (((unsigned int)avg * avg) >> 8)); -} - -unsigned int vpx_variance16x8_mmx(const unsigned char *a, int a_stride, - const unsigned char *b, int b_stride, - unsigned int *sse) { - unsigned int sse0, sse1, var; - int sum0, sum1, avg; - - vpx_get8x8var_mmx(a, a_stride, b, b_stride, &sse0, &sum0); - vpx_get8x8var_mmx(a + 8, a_stride, b + 8, b_stride, &sse1, &sum1); - - var = sse0 + sse1; - avg = sum0 + sum1; - *sse = var; - return (var - (((unsigned int)avg * avg) >> 7)); -} - -unsigned int vpx_variance8x16_mmx(const unsigned char *a, int a_stride, - const unsigned char *b, int b_stride, - unsigned int *sse) { - unsigned int sse0, sse1, var; - int sum0, sum1, avg; - - vpx_get8x8var_mmx(a, a_stride, b, b_stride, &sse0, &sum0); - vpx_get8x8var_mmx(a + 8 * a_stride, a_stride, - b + 8 * b_stride, b_stride, &sse1, &sum1); - - var = sse0 + sse1; - avg = sum0 + sum1; - *sse = var; - - return (var - (((unsigned int)avg * avg) >> 7)); -} - uint32_t vpx_sub_pixel_variance4x4_mmx(const uint8_t *a, int a_stride, int xoffset, int yoffset, const uint8_t *b, int b_stride,