From a0ae3682aa67f882006c604196f7ee83eff88d84 Mon Sep 17 00:00:00 2001 From: John Koleszar Date: Wed, 27 Oct 2010 11:28:43 -0400 Subject: [PATCH] Fix half-pixel variance RTCD functions This patch fixes the system dependent entries for the half-pixel variance functions in both the RTCD and non-RTCD cases: - The generic C versions of these functions are now correct. Before all three cases called the hv code. - Wire up the ARM functions in RTCD mode - Created stubs for x86 to call the optimized subpixel functions with the correct parameters, rather than falling back to C code. Change-Id: I1d937d074d929e0eb93aacb1232cc5e0ad1c6184 --- vp8/encoder/arm/arm_csystemdependent.c | 3 ++ vp8/encoder/generic/csystemdependent.c | 3 ++ vp8/encoder/variance.h | 6 +-- vp8/encoder/variance_c.c | 26 ++++++++++- vp8/encoder/x86/variance_mmx.c | 36 +++++++++++++++ vp8/encoder/x86/variance_sse2.c | 81 ++++++++++++++++++++++++++++++++++ vp8/encoder/x86/variance_x86.h | 24 ++++++++++ vp8/encoder/x86/x86_csystemdependent.c | 6 +++ 8 files changed, 181 insertions(+), 4 deletions(-) diff --git a/vp8/encoder/arm/arm_csystemdependent.c b/vp8/encoder/arm/arm_csystemdependent.c index 8736fcf..a1f1102 100644 --- a/vp8/encoder/arm/arm_csystemdependent.c +++ b/vp8/encoder/arm/arm_csystemdependent.c @@ -93,6 +93,9 @@ void vp8_arch_arm_encoder_init(VP8_COMP *cpi) /*cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_c; cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_c;*/ cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_neon; + cpi->rtcd.variance.halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_neon; + cpi->rtcd.variance.halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_neon; + cpi->rtcd.variance.halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_neon; cpi->rtcd.variance.mse16x16 = vp8_mse16x16_neon; /*cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c;*/ diff --git a/vp8/encoder/generic/csystemdependent.c b/vp8/encoder/generic/csystemdependent.c index 520b08f..ae22b27 100644 --- a/vp8/encoder/generic/csystemdependent.c +++ b/vp8/encoder/generic/csystemdependent.c @@ -57,6 +57,9 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi) cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_c; cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_c; cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_c; + cpi->rtcd.variance.halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_c; + cpi->rtcd.variance.halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_c; + cpi->rtcd.variance.halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_c; cpi->rtcd.variance.subpixmse16x16 = vp8_sub_pixel_mse16x16_c; cpi->rtcd.variance.mse16x16 = vp8_mse16x16_c; diff --git a/vp8/encoder/variance.h b/vp8/encoder/variance.h index f60038f..5b70fe5 100644 --- a/vp8/encoder/variance.h +++ b/vp8/encoder/variance.h @@ -220,17 +220,17 @@ extern prototype_subpixvariance(vp8_variance_subpixvar16x8); extern prototype_subpixvariance(vp8_variance_subpixvar16x16); #ifndef vp8_variance_halfpixvar16x16_h -#define vp8_variance_halfpixvar16x16_h vp8_half_pixel_variance16x16_c +#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_c #endif extern prototype_variance(vp8_variance_halfpixvar16x16_h); #ifndef vp8_variance_halfpixvar16x16_v -#define vp8_variance_halfpixvar16x16_v vp8_half_pixel_variance16x16_c +#define vp8_variance_halfpixvar16x16_v vp8_variance_halfpixvar16x16_v_c #endif extern prototype_variance(vp8_variance_halfpixvar16x16_v); #ifndef vp8_variance_halfpixvar16x16_hv -#define vp8_variance_halfpixvar16x16_hv vp8_half_pixel_variance16x16_c +#define vp8_variance_halfpixvar16x16_hv vp8_variance_halfpixvar16x16_hv_c #endif extern prototype_variance(vp8_variance_halfpixvar16x16_hv); diff --git a/vp8/encoder/variance_c.c b/vp8/encoder/variance_c.c index 48d5bb5..95ec96c 100644 --- a/vp8/encoder/variance_c.c +++ b/vp8/encoder/variance_c.c @@ -461,7 +461,31 @@ unsigned int vp8_sub_pixel_variance16x16_c } -unsigned int vp8_half_pixel_variance16x16_c( +unsigned int vp8_variance_halfpixvar16x16_h_c( + const unsigned char *src_ptr, + int source_stride, + const unsigned char *ref_ptr, + int recon_stride, + unsigned int *sse) +{ + return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 4, 0, + ref_ptr, recon_stride, sse); +} + + +unsigned int vp8_variance_halfpixvar16x16_v_c( + const unsigned char *src_ptr, + int source_stride, + const unsigned char *ref_ptr, + int recon_stride, + unsigned int *sse) +{ + return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 4, + ref_ptr, recon_stride, sse); +} + + +unsigned int vp8_variance_halfpixvar16x16_hv_c( const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, diff --git a/vp8/encoder/x86/variance_mmx.c b/vp8/encoder/x86/variance_mmx.c index a5a89d6..2df73a6 100644 --- a/vp8/encoder/x86/variance_mmx.c +++ b/vp8/encoder/x86/variance_mmx.c @@ -595,3 +595,39 @@ unsigned int vp8_i_sub_pixel_variance8x16_mmx *sse = xxsum0; return (xxsum0 - ((xsum0 * xsum0) >> 7)); } + + +unsigned int vp8_variance_halfpixvar16x16_h_mmx( + const unsigned char *src_ptr, + int source_stride, + const unsigned char *ref_ptr, + int recon_stride, + unsigned int *sse) +{ + return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 4, 0, + ref_ptr, recon_stride, sse); +} + + +unsigned int vp8_variance_halfpixvar16x16_v_mmx( + const unsigned char *src_ptr, + int source_stride, + const unsigned char *ref_ptr, + int recon_stride, + unsigned int *sse) +{ + return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 0, 4, + ref_ptr, recon_stride, sse); +} + + +unsigned int vp8_variance_halfpixvar16x16_hv_mmx( + const unsigned char *src_ptr, + int source_stride, + const unsigned char *ref_ptr, + int recon_stride, + unsigned int *sse) +{ + return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 4, 4, + ref_ptr, recon_stride, sse); +} diff --git a/vp8/encoder/x86/variance_sse2.c b/vp8/encoder/x86/variance_sse2.c index fb0bac1..006e0a2 100644 --- a/vp8/encoder/x86/variance_sse2.c +++ b/vp8/encoder/x86/variance_sse2.c @@ -513,3 +513,84 @@ unsigned int vp8_i_sub_pixel_variance8x16_wmt return vp8_sub_pixel_variance8x16_wmt(src_ptr, (src_pixels_per_line >> 1), xoffset, yoffset, dst_ptr, (dst_pixels_per_line >> 1), sse); } + + +unsigned int vp8_variance_halfpixvar16x16_h_wmt( + const unsigned char *src_ptr, + int src_pixels_per_line, + const unsigned char *dst_ptr, + int dst_pixels_per_line, + unsigned int *sse) +{ + int xsum0, xsum1; + unsigned int xxsum0, xxsum1; + + vp8_half_horiz_variance16x_h_sse2( + src_ptr, src_pixels_per_line, + dst_ptr, dst_pixels_per_line, 16, + &xsum0, &xxsum0); + + vp8_half_horiz_variance16x_h_sse2( + src_ptr + 8, src_pixels_per_line, + dst_ptr + 8, dst_pixels_per_line, 16, + &xsum1, &xxsum1); + + xsum0 += xsum1; + xxsum0 += xxsum1; + *sse = xxsum0; + return (xxsum0 - ((xsum0 * xsum0) >> 8)); +} + + +unsigned int vp8_variance_halfpixvar16x16_v_wmt( + const unsigned char *src_ptr, + int src_pixels_per_line, + const unsigned char *dst_ptr, + int dst_pixels_per_line, + unsigned int *sse) +{ + int xsum0, xsum1; + unsigned int xxsum0, xxsum1; + + vp8_half_vert_variance16x_h_sse2( + src_ptr, src_pixels_per_line, + dst_ptr, dst_pixels_per_line, 16, + &xsum0, &xxsum0); + + vp8_half_vert_variance16x_h_sse2( + src_ptr + 8, src_pixels_per_line, + dst_ptr + 8, dst_pixels_per_line, 16, + &xsum1, &xxsum1); + + xsum0 += xsum1; + xxsum0 += xxsum1; + *sse = xxsum0; + return (xxsum0 - ((xsum0 * xsum0) >> 8)); +} + + +unsigned int vp8_variance_halfpixvar16x16_hv_wmt( + const unsigned char *src_ptr, + int src_pixels_per_line, + const unsigned char *dst_ptr, + int dst_pixels_per_line, + unsigned int *sse) +{ + int xsum0, xsum1; + unsigned int xxsum0, xxsum1; + + vp8_half_horiz_vert_variance16x_h_sse2( + src_ptr, src_pixels_per_line, + dst_ptr, dst_pixels_per_line, 16, + &xsum0, &xxsum0); + + vp8_half_horiz_vert_variance16x_h_sse2( + src_ptr + 8, src_pixels_per_line, + dst_ptr + 8, dst_pixels_per_line, 16, + &xsum1, &xxsum1); + + xsum0 += xsum1; + xxsum0 += xxsum1; + *sse = xxsum0; + return (xxsum0 - ((xsum0 * xsum0) >> 8)); +} diff --git a/vp8/encoder/x86/variance_x86.h b/vp8/encoder/x86/variance_x86.h index 3c9f9c7..2b62b5f 100644 --- a/vp8/encoder/x86/variance_x86.h +++ b/vp8/encoder/x86/variance_x86.h @@ -35,6 +35,9 @@ extern prototype_subpixvariance(vp8_sub_pixel_variance8x8_mmx); extern prototype_subpixvariance(vp8_sub_pixel_variance8x16_mmx); extern prototype_subpixvariance(vp8_sub_pixel_variance16x8_mmx); extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_mmx); +extern prototype_variance(vp8_variance_halfpixvar16x16_h_mmx); +extern prototype_variance(vp8_variance_halfpixvar16x16_v_mmx); +extern prototype_variance(vp8_variance_halfpixvar16x16_hv_mmx); extern prototype_subpixvariance(vp8_sub_pixel_mse16x16_mmx); extern prototype_getmbss(vp8_get_mb_ss_mmx); extern prototype_variance(vp8_mse16x16_mmx); @@ -89,6 +92,15 @@ extern prototype_sad(vp8_get4x4sse_cs_mmx); #undef vp8_variance_subpixvar16x16 #define vp8_variance_subpixvar16x16 vp8_sub_pixel_variance16x16_mmx +#undef vp8_variance_halfpixvar16x16_h +#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_mmx + +#undef vp8_variance_halfpixvar16x16_v +#define vp8_variance_halfpixvar16x16_v vp8_variance_halfpixvar16x16_v_mmx + +#undef vp8_variance_halfpixvar16x16_hv +#define vp8_variance_halfpixvar16x16_hv vp8_variance_halfpixvar16x16_hv_mmx + #undef vp8_variance_subpixmse16x16 #define vp8_variance_subpixmse16x16 vp8_sub_pixel_mse16x16_mmx @@ -130,6 +142,9 @@ extern prototype_subpixvariance(vp8_sub_pixel_variance8x8_wmt); extern prototype_subpixvariance(vp8_sub_pixel_variance8x16_wmt); extern prototype_subpixvariance(vp8_sub_pixel_variance16x8_wmt); extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_wmt); +extern prototype_variance(vp8_variance_halfpixvar16x16_h_wmt); +extern prototype_variance(vp8_variance_halfpixvar16x16_v_wmt); +extern prototype_variance(vp8_variance_halfpixvar16x16_hv_wmt); extern prototype_subpixvariance(vp8_sub_pixel_mse16x16_wmt); extern prototype_getmbss(vp8_get_mb_ss_sse2); extern prototype_variance(vp8_mse16x16_wmt); @@ -183,6 +198,15 @@ extern prototype_variance2(vp8_get16x16var_sse2); #undef vp8_variance_subpixvar16x16 #define vp8_variance_subpixvar16x16 vp8_sub_pixel_variance16x16_wmt +#undef vp8_variance_halfpixvar16x16_h +#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_wmt + +#undef vp8_variance_halfpixvar16x16_v +#define vp8_variance_halfpixvar16x16_v vp8_variance_halfpixvar16x16_v_wmt + +#undef vp8_variance_halfpixvar16x16_hv +#define vp8_variance_halfpixvar16x16_hv vp8_variance_halfpixvar16x16_hv_wmt + #undef vp8_variance_subpixmse16x16 #define vp8_variance_subpixmse16x16 vp8_sub_pixel_mse16x16_wmt diff --git a/vp8/encoder/x86/x86_csystemdependent.c b/vp8/encoder/x86/x86_csystemdependent.c index 3e5a8ab..2581c33 100644 --- a/vp8/encoder/x86/x86_csystemdependent.c +++ b/vp8/encoder/x86/x86_csystemdependent.c @@ -218,6 +218,9 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi) cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_mmx; cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_mmx; cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_mmx; + cpi->rtcd.variance.halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_mmx; + cpi->rtcd.variance.halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_mmx; + cpi->rtcd.variance.halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_mmx; cpi->rtcd.variance.subpixmse16x16 = vp8_sub_pixel_mse16x16_mmx; cpi->rtcd.variance.mse16x16 = vp8_mse16x16_mmx; @@ -274,6 +277,9 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi) cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_wmt; cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_wmt; cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_wmt; + cpi->rtcd.variance.halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_wmt; + cpi->rtcd.variance.halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_wmt; + cpi->rtcd.variance.halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_wmt; cpi->rtcd.variance.subpixmse16x16 = vp8_sub_pixel_mse16x16_wmt; cpi->rtcd.variance.mse16x16 = vp8_mse16x16_wmt; -- 2.7.4