From 7b8e7f0f3ae13ebf29200324b0c4d7fe64780a58 Mon Sep 17 00:00:00 2001 From: Yunqing Wang Date: Wed, 9 Mar 2011 11:16:30 -0500 Subject: [PATCH] Add vp8_sub_pixel_variance16x8_ssse3 function Added SSSE3 function Change-Id: I8c304c92458618d93fda3a2f62bd09ccb63e75ad --- vp8/encoder/x86/variance_ssse3.c | 53 ++++++++++++++++++++++++++++++++-- vp8/encoder/x86/variance_x86.h | 4 +++ vp8/encoder/x86/x86_csystemdependent.c | 1 + 3 files changed, 56 insertions(+), 2 deletions(-) diff --git a/vp8/encoder/x86/variance_ssse3.c b/vp8/encoder/x86/variance_ssse3.c index d50ae3a..eb5d486 100644 --- a/vp8/encoder/x86/variance_ssse3.c +++ b/vp8/encoder/x86/variance_ssse3.c @@ -76,8 +76,8 @@ unsigned int vp8_sub_pixel_variance16x16_ssse3 unsigned int *sse ) { - int xsum0, xsum1; - unsigned int xxsum0, xxsum1; + int xsum0; + unsigned int xxsum0; // note we could avoid these if statements if the calling function // just called the appropriate functions inside. @@ -114,3 +114,52 @@ unsigned int vp8_sub_pixel_variance16x16_ssse3 *sse = xxsum0; return (xxsum0 - ((xsum0 * xsum0) >> 8)); } + +unsigned int vp8_sub_pixel_variance16x8_ssse3 +( + const unsigned char *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + const unsigned char *dst_ptr, + int dst_pixels_per_line, + unsigned int *sse + +) +{ + int xsum0; + unsigned int xxsum0; + + if (xoffset == 4 && yoffset == 0) + { + vp8_half_horiz_variance16x_h_sse2( + src_ptr, src_pixels_per_line, + dst_ptr, dst_pixels_per_line, 8, + &xsum0, &xxsum0); + } + else if (xoffset == 0 && yoffset == 4) + { + vp8_half_vert_variance16x_h_sse2( + src_ptr, src_pixels_per_line, + dst_ptr, dst_pixels_per_line, 8, + &xsum0, &xxsum0); + } + else if (xoffset == 4 && yoffset == 4) + { + vp8_half_horiz_vert_variance16x_h_sse2( + src_ptr, src_pixels_per_line, + dst_ptr, dst_pixels_per_line, 8, + &xsum0, &xxsum0); + } + else + { + vp8_filter_block2d_bil_var_ssse3( + src_ptr, src_pixels_per_line, + dst_ptr, dst_pixels_per_line, 8, + xoffset, yoffset, + &xsum0, &xxsum0); + } + + *sse = xxsum0; + return (xxsum0 - ((xsum0 * xsum0) >> 7)); +} diff --git a/vp8/encoder/x86/variance_x86.h b/vp8/encoder/x86/variance_x86.h index 1e2fb34..3560f74 100644 --- a/vp8/encoder/x86/variance_x86.h +++ b/vp8/encoder/x86/variance_x86.h @@ -286,6 +286,7 @@ extern prototype_sad_multi_dif_address(vp8_sad4x4x4d_sse3); #if HAVE_SSSE3 extern prototype_sad_multi_same_address(vp8_sad16x16x3_ssse3); extern prototype_sad_multi_same_address(vp8_sad16x8x3_ssse3); +extern prototype_subpixvariance(vp8_sub_pixel_variance16x8_ssse3); extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_ssse3); #if !CONFIG_RUNTIME_CPU_DETECT @@ -295,6 +296,9 @@ extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_ssse3); #undef vp8_variance_sad16x8x3 #define vp8_variance_sad16x8x3 vp8_sad16x8x3_ssse3 +#undef vp8_variance_subpixvar16x8 +#define vp8_variance_subpixvar16x8 vp8_sub_pixel_variance16x8_ssse3 + #undef vp8_variance_subpixvar16x16 #define vp8_variance_subpixvar16x16 vp8_sub_pixel_variance16x16_ssse3 diff --git a/vp8/encoder/x86/x86_csystemdependent.c b/vp8/encoder/x86/x86_csystemdependent.c index c7639a7..3158ac1 100644 --- a/vp8/encoder/x86/x86_csystemdependent.c +++ b/vp8/encoder/x86/x86_csystemdependent.c @@ -334,6 +334,7 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi) cpi->rtcd.variance.sad16x16x3 = vp8_sad16x16x3_ssse3; cpi->rtcd.variance.sad16x8x3 = vp8_sad16x8x3_ssse3; + cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_ssse3; cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_ssse3; cpi->rtcd.quantize.fastquantb = vp8_fast_quantize_b_ssse3; -- 2.7.4