From: Jim Bankoski Date: Mon, 22 Oct 2012 03:47:57 +0000 (-0700) Subject: Remove variance vtable from rtcd X-Git-Tag: v1.3.0~1217^2~206^2 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=118b2fe962e7a93a7989244d61dbd68354984c9e;p=platform%2Fupstream%2Flibvpx.git Remove variance vtable from rtcd Change-Id: Idd2722a538423b451e1e3495f89a7141480493d6 --- diff --git a/vp8/common/rtcd_defs.sh b/vp8/common/rtcd_defs.sh index ef272df..35d72b5 100644 --- a/vp8/common/rtcd_defs.sh +++ b/vp8/common/rtcd_defs.sh @@ -174,3 +174,201 @@ vp8_loop_filter_simple_bh_sse2=vp8_loop_filter_bhs_sse2 vp8_loop_filter_simple_bh_media=vp8_loop_filter_bhs_armv6 vp8_loop_filter_simple_bh_neon=vp8_loop_filter_bhs_neon +# +# Encoder functions below this point. +# +if [ "$CONFIG_VP8_ENCODER" = "yes" ]; then + + +# variance +[ $arch = "x86_64" ] && mmx_x86_64=mmx && sse2_x86_64=sse2 + +prototype unsigned int vp8_variance32x32 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse" +specialize vp8_variance32x32 + +prototype unsigned int vp8_variance16x16 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse" +specialize vp8_variance16x16 mmx sse2 +vp8_variance16x16_sse2=vp8_variance16x16_wmt +vp8_variance16x16_mmx=vp8_variance16x16_mmx + +prototype unsigned int vp8_variance16x8 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse" +specialize vp8_variance16x8 mmx sse2 +vp8_variance16x8_sse2=vp8_variance16x8_wmt +vp8_variance16x8_mmx=vp8_variance16x8_mmx + +prototype unsigned int vp8_variance8x16 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse" +specialize vp8_variance8x16 mmx sse2 +vp8_variance8x16_sse2=vp8_variance8x16_wmt +vp8_variance8x16_mmx=vp8_variance8x16_mmx + +prototype unsigned int vp8_variance8x8 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse" +specialize vp8_variance8x8 mmx sse2 +vp8_variance8x8_sse2=vp8_variance8x8_wmt +vp8_variance8x8_mmx=vp8_variance8x8_mmx + +prototype unsigned int vp8_variance4x4 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse" +specialize vp8_variance4x4 mmx sse2 +vp8_variance4x4_sse2=vp8_variance4x4_wmt +vp8_variance4x4_mmx=vp8_variance4x4_mmx + +prototype unsigned int vp8_sub_pixel_variance32x32 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse" +specialize vp8_sub_pixel_variance32x32 + +prototype unsigned int vp8_sub_pixel_variance16x16 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse" +specialize vp8_sub_pixel_variance16x16 sse2 mmx ssse3 +vp8_sub_pixel_variance16x16_sse2=vp8_sub_pixel_variance16x16_wmt + +prototype unsigned int vp8_sub_pixel_variance8x16 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse" +specialize vp8_sub_pixel_variance8x16 sse2 mmx +vp8_sub_pixel_variance8x16_sse2=vp8_sub_pixel_variance8x16_wmt + +prototype unsigned int vp8_sub_pixel_variance16x8 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse" +specialize vp8_sub_pixel_variance16x8 sse2 mmx ssse3 +vp8_sub_pixel_variance16x8_sse2=vp8_sub_pixel_variance16x8_ssse3; +vp8_sub_pixel_variance16x8_sse2=vp8_sub_pixel_variance16x8_wmt + +prototype unsigned int vp8_sub_pixel_variance8x8 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse" +specialize vp8_sub_pixel_variance8x8 sse2 mmx +vp8_sub_pixel_variance8x8_sse2=vp8_sub_pixel_variance8x8_wmt + +prototype unsigned int vp8_sub_pixel_variance4x4 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse" +specialize vp8_sub_pixel_variance4x4 sse2 mmx +vp8_sub_pixel_variance4x4_sse2=vp8_sub_pixel_variance4x4_wmt + +prototype unsigned int vp8_sad32x32 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad" +specialize vp8_sad32x32 + +prototype unsigned int vp8_sad16x16 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad" +specialize vp8_sad16x16 mmx sse2 sse3 +vp8_sad16x16_sse2=vp8_sad16x16_wmt + +prototype unsigned int vp8_sad16x8 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad" +specialize vp8_sad16x8 mmx sse2 +vp8_sad16x8_sse2=vp8_sad16x8_wmt + +prototype unsigned int vp8_sad8x16 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad" +specialize vp8_sad8x16 mmx sse2 +vp8_sad8x16_sse2=vp8_sad8x16_wmt + +prototype unsigned int vp8_sad8x8 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad" +specialize vp8_sad8x8 mmx sse2 +vp8_sad8x8_sse2=vp8_sad8x8_wmt + +prototype unsigned int vp8_sad4x4 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int max_sad" +specialize vp8_sad4x4 mmx sse2 +vp8_sad4x4_sse2=vp8_sad4x4_wmt + +prototype unsigned int vp8_variance_halfpixvar16x16_h "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse" +specialize vp8_variance_halfpixvar16x16_h mmx sse2 +vp8_variance_halfpixvar16x16_h_sse2=vp8_variance_halfpixvar16x16_h_wmt + +prototype unsigned int vp8_variance_halfpixvar16x16_v "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse" +specialize vp8_variance_halfpixvar16x16_v mmx sse2 +vp8_variance_halfpixvar16x16_v_sse2=vp8_variance_halfpixvar16x16_v_wmt + +prototype unsigned int vp8_variance_halfpixvar16x16_hv "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse" +specialize vp8_variance_halfpixvar16x16_hv mmx sse2 +vp8_variance_halfpixvar16x16_hv_sse2=vp8_variance_halfpixvar16x16_hv_wmt + +prototype unsigned int vp8_variance_halfpixvar32x32_h "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse" +specialize vp8_variance_halfpixvar32x32_h + +prototype unsigned int vp8_variance_halfpixvar32x32_v "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse" +specialize vp8_variance_halfpixvar32x32_v + +prototype unsigned int vp8_variance_halfpixvar32x32_hv "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse" +specialize vp8_variance_halfpixvar32x32_hv + +prototype void vp8_sad32x32x3 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array" +specialize vp8_sad32x32x3 + +prototype void vp8_sad16x16x3 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array" +specialize vp8_sad16x16x3 sse3 ssse3 + +prototype void vp8_sad16x8x3 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array" +specialize vp8_sad16x8x3 sse3 ssse3 + +prototype void vp8_sad8x16x3 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array" +specialize vp8_sad8x16x3 sse3 + +prototype void vp8_sad8x8x3 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array" +specialize vp8_sad8x8x3 sse3 + +prototype void vp8_sad4x4x3 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sad_array" +specialize vp8_sad4x4x3 sse3 + +prototype void vp8_sad32x32x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array" +specialize vp8_sad32x32x8 + +prototype void vp8_sad16x16x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array" +specialize vp8_sad16x16x8 sse4 + +prototype void vp8_sad16x8x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array" +specialize vp8_sad16x8x8 sse4 + +prototype void vp8_sad8x16x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array" +specialize vp8_sad8x16x8 sse4 + +prototype void vp8_sad8x8x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array" +specialize vp8_sad8x8x8 sse4 + +prototype void vp8_sad4x4x8 "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned short *sad_array" +specialize vp8_sad4x4x8 sse4 + +prototype void vp8_sad32x32x4d "const unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr[], int ref_stride, unsigned int *sad_array" +specialize vp8_sad32x32x4d + +prototype void vp8_sad16x16x4d "const unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr[], int ref_stride, unsigned int *sad_array" +specialize vp8_sad16x16x4d sse3 + +prototype void vp8_sad16x8x4d "const unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr[], int ref_stride, unsigned int *sad_array" +specialize vp8_sad16x8x4d sse3 + +prototype void vp8_sad8x16x4d "const unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr[], int ref_stride, unsigned int *sad_array" +specialize vp8_sad8x16x4d sse3 + +prototype void vp8_sad8x8x4d "const unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr[], int ref_stride, unsigned int *sad_array" +specialize vp8_sad8x8x4d sse3 + +prototype void vp8_sad4x4x4d "const unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr[], int ref_stride, unsigned int *sad_array" +specialize vp8_sad4x4x4d sse3 + +# +# Block copy +# +case $arch in + x86*) + prototype void vp8_copy32xn "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int ref_stride, int n" + specialize vp8_copy32xn sse2 sse3 + ;; +esac + +prototype unsigned int vp8_sub_pixel_mse16x16 "const unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, const unsigned char *dst_ptr, int dst_pixels_per_line, unsigned int *sse" +specialize vp8_sub_pixel_mse16x16 sse2 mmx +vp8_sub_pixel_mse16x16_sse2=vp8_sub_pixel_mse16x16_wmt + +prototype unsigned int vp8_mse16x16 "const unsigned char *src_ptr, int source_stride, const unsigned char *ref_ptr, int recon_stride, unsigned int *sse" +specialize vp8_mse16x16 mmx sse2 +vp8_mse16x16_sse2=vp8_mse16x16_wmt + +prototype unsigned int vp8_sub_pixel_mse32x32 "const unsigned char *src_ptr, int source_stride, int xoffset, int yoffset, const unsigned char *ref_ptr, int Refstride, unsigned int *sse" +specialize vp8_sub_pixel_mse32x32 + +prototype unsigned int vp8_get_mb_ss "const short *" +specialize vp8_get_mb_ss mmx sse2 + +# +# Structured Similarity (SSIM) +# +if [ "$CONFIG_INTERNAL_STATS" = "yes" ]; then + [ $arch = "x86_64" ] && sse2_on_x86_64=sse2 + + prototype void vp8_ssim_parms_8x8 "unsigned char *s, int sp, unsigned char *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr" + specialize vp8_ssim_parms_8x8 $sse2_on_x86_64 + + prototype void vp8_ssim_parms_16x16 "unsigned char *s, int sp, unsigned char *r, int rp, unsigned long *sum_s, unsigned long *sum_r, unsigned long *sum_sq_s, unsigned long *sum_sq_r, unsigned long *sum_sxr" + specialize vp8_ssim_parms_16x16 $sse2_on_x86_64 +fi + +fi +# end encoder functions diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c index 8a2c561..6824409 100644 --- a/vp8/encoder/encodeframe.c +++ b/vp8/encoder/encodeframe.c @@ -120,8 +120,8 @@ static unsigned int tt_activity_measure(VP8_COMP *cpi, MACROBLOCK *x) { * lambda using a non-linear combination (e.g., the smallest, or second * smallest, etc.). */ - act = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16)(x->src.y_buffer, - x->src.y_stride, VP8_VAR_OFFS, 0, &sse); + act = vp8_variance16x16(x->src.y_buffer, x->src.y_stride, VP8_VAR_OFFS, 0, + &sse); act = act << 4; /* If the region is flat, lower the activity some more. */ diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c index 703a101..e2b92c9 100644 --- a/vp8/encoder/encodeintra.c +++ b/vp8/encoder/encodeintra.c @@ -48,7 +48,7 @@ int vp8_encode_intra(VP8_COMP *cpi, MACROBLOCK *x, int use_16x16_pred) { } } - intra_pred_var = VARIANCE_INVOKE(&cpi->rtcd.variance, getmbss)(x->src_diff); + intra_pred_var = vp8_get_mb_ss(x->src_diff); return intra_pred_var; } diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c index 95f6d97..1e88454 100644 --- a/vp8/encoder/firstpass.c +++ b/vp8/encoder/firstpass.c @@ -364,7 +364,8 @@ static void zz_motion_search(VP8_COMP *cpi, MACROBLOCK *x, YV12_BUFFER_CONFIG *r ref_ptr = (unsigned char *)(*(d->base_pre) + d->pre); - VARIANCE_INVOKE(IF_RTCD(&cpi->rtcd.variance), mse16x16)(src_ptr, src_stride, ref_ptr, ref_stride, (unsigned int *)(best_motion_err)); + vp8_mse16x16(src_ptr, src_stride, ref_ptr, ref_stride, + (unsigned int *)(best_motion_err)); } static void first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x, @@ -387,7 +388,7 @@ static void first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x, int new_mv_mode_penalty = 256; // override the default variance function to use MSE - v_fn_ptr.vf = VARIANCE_INVOKE(IF_RTCD(&cpi->rtcd.variance), mse16x16); + v_fn_ptr.vf = vp8_mse16x16; // Set up pointers for this macro block recon buffer xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset; diff --git a/vp8/encoder/generic/csystemdependent.c b/vp8/encoder/generic/csystemdependent.c index 356e32c..44e83fd 100644 --- a/vp8/encoder/generic/csystemdependent.c +++ b/vp8/encoder/generic/csystemdependent.c @@ -23,80 +23,6 @@ extern void vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER void vp8_cmachine_specific_config(VP8_COMP *cpi) { #if CONFIG_RUNTIME_CPU_DETECT cpi->rtcd.common = &cpi->common.rtcd; -#if CONFIG_SUPERBLOCKS - cpi->rtcd.variance.sad32x32 = vp8_sad32x32_c; -#endif - cpi->rtcd.variance.sad16x16 = vp8_sad16x16_c; - cpi->rtcd.variance.sad16x8 = vp8_sad16x8_c; - cpi->rtcd.variance.sad8x16 = vp8_sad8x16_c; - cpi->rtcd.variance.sad8x8 = vp8_sad8x8_c; - cpi->rtcd.variance.sad4x4 = vp8_sad4x4_c; - -#if CONFIG_SUPERBLOCKS - cpi->rtcd.variance.sad32x32x3 = vp8_sad32x32x3_c; -#endif - cpi->rtcd.variance.sad16x16x3 = vp8_sad16x16x3_c; - cpi->rtcd.variance.sad16x8x3 = vp8_sad16x8x3_c; - cpi->rtcd.variance.sad8x16x3 = vp8_sad8x16x3_c; - cpi->rtcd.variance.sad8x8x3 = vp8_sad8x8x3_c; - cpi->rtcd.variance.sad4x4x3 = vp8_sad4x4x3_c; - -#if CONFIG_SUPERBLOCKS - cpi->rtcd.variance.sad32x32x8 = vp8_sad32x32x8_c; -#endif - cpi->rtcd.variance.sad16x16x8 = vp8_sad16x16x8_c; - cpi->rtcd.variance.sad16x8x8 = vp8_sad16x8x8_c; - cpi->rtcd.variance.sad8x16x8 = vp8_sad8x16x8_c; - cpi->rtcd.variance.sad8x8x8 = vp8_sad8x8x8_c; - cpi->rtcd.variance.sad4x4x8 = vp8_sad4x4x8_c; - -#if CONFIG_SUPERBLOCKS - cpi->rtcd.variance.sad32x32x4d = vp8_sad32x32x4d_c; -#endif - cpi->rtcd.variance.sad16x16x4d = vp8_sad16x16x4d_c; - cpi->rtcd.variance.sad16x8x4d = vp8_sad16x8x4d_c; - cpi->rtcd.variance.sad8x16x4d = vp8_sad8x16x4d_c; - cpi->rtcd.variance.sad8x8x4d = vp8_sad8x8x4d_c; - cpi->rtcd.variance.sad4x4x4d = vp8_sad4x4x4d_c; -#if ARCH_X86 || ARCH_X86_64 - cpi->rtcd.variance.copy32xn = vp8_copy32xn_c; -#endif - cpi->rtcd.variance.var4x4 = vp8_variance4x4_c; - cpi->rtcd.variance.var8x8 = vp8_variance8x8_c; - cpi->rtcd.variance.var8x16 = vp8_variance8x16_c; - cpi->rtcd.variance.var16x8 = vp8_variance16x8_c; - cpi->rtcd.variance.var16x16 = vp8_variance16x16_c; -#if CONFIG_SUPERBLOCKS - cpi->rtcd.variance.var32x32 = vp8_variance32x32_c; -#endif - - cpi->rtcd.variance.subpixvar4x4 = vp8_sub_pixel_variance4x4_c; - cpi->rtcd.variance.subpixvar8x8 = vp8_sub_pixel_variance8x8_c; - cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_c; - cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_c; - cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_c; -#if CONFIG_SUPERBLOCKS - cpi->rtcd.variance.subpixvar32x32 = vp8_sub_pixel_variance32x32_c; -#endif - cpi->rtcd.variance.halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_c; -#if CONFIG_SUPERBLOCKS - cpi->rtcd.variance.halfpixvar32x32_h = vp8_variance_halfpixvar32x32_h_c; -#endif - cpi->rtcd.variance.halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_c; -#if CONFIG_SUPERBLOCKS - cpi->rtcd.variance.halfpixvar32x32_v = vp8_variance_halfpixvar32x32_v_c; -#endif - cpi->rtcd.variance.halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_c; -#if CONFIG_SUPERBLOCKS - cpi->rtcd.variance.halfpixvar32x32_hv = vp8_variance_halfpixvar32x32_hv_c; -#endif - cpi->rtcd.variance.subpixmse16x16 = vp8_sub_pixel_mse16x16_c; -#if CONFIG_SUPERBLOCKS - cpi->rtcd.variance.subpixmse32x32 = vp8_sub_pixel_mse32x32_c; -#endif - - cpi->rtcd.variance.mse16x16 = vp8_mse16x16_c; - cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c; cpi->rtcd.fdct.short8x8 = vp8_short_fdct8x8_c; cpi->rtcd.fdct.short16x16 = vp8_short_fdct16x16_c; @@ -118,16 +44,11 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi) { cpi->rtcd.search.refining_search = vp8_refining_search_sad; cpi->rtcd.search.diamond_search = vp8_diamond_search_sad; cpi->rtcd.temporal.apply = vp8_temporal_filter_apply_c; - cpi->rtcd.variance.satd16x16 = vp8_satd16x16_c; cpi->rtcd.fdct.short4x4 = vp8_short_fdct4x4_c; cpi->rtcd.fdct.short8x4 = vp8_short_fdct8x4_c; cpi->rtcd.fdct.fast4x4 = vp8_short_fdct4x4_c; cpi->rtcd.fdct.fast8x4 = vp8_short_fdct8x4_c; cpi->rtcd.fdct.walsh_short4x4 = vp8_short_walsh4x4_c; -#if CONFIG_INTERNAL_STATS - cpi->rtcd.variance.ssimpf_8x8 = vp8_ssim_parms_8x8_c; - cpi->rtcd.variance.ssimpf_16x16 = vp8_ssim_parms_16x16_c; -#endif #endif vp8_yv12_copy_partial_frame_ptr = vp8_yv12_copy_partial_frame; diff --git a/vp8/encoder/mbgraph.c b/vp8/encoder/mbgraph.c index 180ee58..2eecfcd 100644 --- a/vp8/encoder/mbgraph.c +++ b/vp8/encoder/mbgraph.c @@ -83,10 +83,8 @@ static unsigned int do_16x16_motion_iteration vp8_set_mbmode_and_mvs(x, NEWMV, dst_mv); vp8_build_1st_inter16x16_predictors_mby(xd, xd->predictor, 16, 0); - // VARIANCE_INVOKE(&cpi->rtcd.variance, satd16x16) - best_err = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16) - (xd->dst.y_buffer, xd->dst.y_stride, - xd->predictor, 16, INT_MAX); + best_err = vp8_sad16x16(xd->dst.y_buffer, xd->dst.y_stride, + xd->predictor, 16, INT_MAX); /* restore UMV window */ x->mv_col_min = tmp_col_min; @@ -130,11 +128,8 @@ static int do_16x16_motion_search // FIXME should really use something like near/nearest MV and/or MV prediction xd->pre.y_buffer = ref->y_buffer + mb_y_offset; xd->pre.y_stride = ref->y_stride; - // VARIANCE_INVOKE(&cpi->rtcd.variance, satd16x16) - err = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16) - (ref->y_buffer + mb_y_offset, - ref->y_stride, xd->dst.y_buffer, - xd->dst.y_stride, INT_MAX); + err = vp8_sad16x16(ref->y_buffer + mb_y_offset, ref->y_stride, + xd->dst.y_buffer, xd->dst.y_stride, INT_MAX); dst_mv->as_int = 0; // Test last reference frame using the previous best mv as the @@ -193,10 +188,8 @@ static int do_16x16_zerozero_search xd->pre.y_buffer = ref->y_buffer + mb_y_offset; xd->pre.y_stride = ref->y_stride; // VARIANCE_INVOKE(&cpi->rtcd.variance, satd16x16) - err = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16) - (ref->y_buffer + mb_y_offset, - ref->y_stride, xd->dst.y_buffer, - xd->dst.y_stride, INT_MAX); + err = vp8_sad16x16(ref->y_buffer + mb_y_offset, ref->y_stride, + xd->dst.y_buffer, xd->dst.y_stride, INT_MAX); dst_mv->as_int = 0; @@ -221,11 +214,8 @@ static int find_best_16x16_intra xd->mode_info_context->mbmi.mode = mode; vp8_build_intra_predictors_mby(xd); - // VARIANCE_INVOKE(&cpi->rtcd.variance, satd16x16) - err = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16) - (xd->predictor, 16, - buf->y_buffer + mb_y_offset, - buf->y_stride, best_err); + err = vp8_sad16x16(xd->predictor, 16, buf->y_buffer + mb_y_offset, + buf->y_stride, best_err); // find best if (err < best_err) { best_err = err; diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c index a6cf2f1..85f5f28 100644 --- a/vp8/encoder/mcomp.c +++ b/vp8/encoder/mcomp.c @@ -1479,7 +1479,8 @@ int vp8_diamond_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, for (t = 0; t < 4; t++) block_offset[t] = ss[i + t].offset + best_address; - fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array); + fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, + sad_array); for (t = 0; t < 4; t++, i++) { if (sad_array[t] < bestsad) { diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index cf9989d..00e2a54 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -77,7 +77,7 @@ extern void vp8_yv12_copy_frame_func_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFF extern void vp8_yv12_copy_src_frame_func_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc); #endif -int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, const vp8_variance_rtcd_vtable_t *rtcd); +int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest); extern void vp8_temporal_filter_prepare_c(VP8_COMP *cpi, int distance); @@ -101,25 +101,14 @@ extern const int vp8_gf_interval_table[101]; #if CONFIG_INTERNAL_STATS #include "math.h" -extern double vp8_calc_ssim -( - YV12_BUFFER_CONFIG *source, - YV12_BUFFER_CONFIG *dest, - int lumamask, - double *weight, - const vp8_variance_rtcd_vtable_t *rtcd -); +extern double vp8_calc_ssim(YV12_BUFFER_CONFIG *source, + YV12_BUFFER_CONFIG *dest, int lumamask, + double *weight); -extern double vp8_calc_ssimg -( - YV12_BUFFER_CONFIG *source, - YV12_BUFFER_CONFIG *dest, - double *ssim_y, - double *ssim_u, - double *ssim_v, - const vp8_variance_rtcd_vtable_t *rtcd -); +extern double vp8_calc_ssimg(YV12_BUFFER_CONFIG *source, + YV12_BUFFER_CONFIG *dest, double *ssim_y, + double *ssim_u, double *ssim_v); #endif @@ -2026,74 +2015,48 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf) { init_mv_ref_counts(); #endif +#define BFP(BT, SDF, VF, SVF, SVFHH, SVFHV, SVFHHV, SDX3F, SDX8F, SDX4DF) \ + cpi->fn_ptr[BT].sdf = SDF; \ + cpi->fn_ptr[BT].vf = VF; \ + cpi->fn_ptr[BT].svf = SVF; \ + cpi->fn_ptr[BT].svf_halfpix_h = SVFHH; \ + cpi->fn_ptr[BT].svf_halfpix_v = SVFHV; \ + cpi->fn_ptr[BT].svf_halfpix_hv = SVFHHV; \ + cpi->fn_ptr[BT].sdx3f = SDX3F; \ + cpi->fn_ptr[BT].sdx8f = SDX8F; \ + cpi->fn_ptr[BT].sdx4df = SDX4DF; + + #if CONFIG_SUPERBLOCKS - cpi->fn_ptr[BLOCK_32X32].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad32x32); - cpi->fn_ptr[BLOCK_32X32].vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var32x32); - cpi->fn_ptr[BLOCK_32X32].svf = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar32x32); - cpi->fn_ptr[BLOCK_32X32].svf_halfpix_h = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar32x32_h); - cpi->fn_ptr[BLOCK_32X32].svf_halfpix_v = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar32x32_v); - cpi->fn_ptr[BLOCK_32X32].svf_halfpix_hv = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar32x32_hv); - cpi->fn_ptr[BLOCK_32X32].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad32x32x3); - cpi->fn_ptr[BLOCK_32X32].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad32x32x8); - cpi->fn_ptr[BLOCK_32X32].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad32x32x4d); + BFP(BLOCK_32X32, vp8_sad32x32, vp8_variance32x32, vp8_sub_pixel_variance32x32, + vp8_variance_halfpixvar32x32_h, vp8_variance_halfpixvar32x32_v, + vp8_variance_halfpixvar32x32_hv, vp8_sad32x32x3, vp8_sad32x32x8, + vp8_sad32x32x4d) #endif - cpi->fn_ptr[BLOCK_16X16].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16); - cpi->fn_ptr[BLOCK_16X16].vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16); - cpi->fn_ptr[BLOCK_16X16].svf = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar16x16); - cpi->fn_ptr[BLOCK_16X16].svf_halfpix_h = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar16x16_h); - cpi->fn_ptr[BLOCK_16X16].svf_halfpix_v = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar16x16_v); - cpi->fn_ptr[BLOCK_16X16].svf_halfpix_hv = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar16x16_hv); - cpi->fn_ptr[BLOCK_16X16].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x3); - cpi->fn_ptr[BLOCK_16X16].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x8); - cpi->fn_ptr[BLOCK_16X16].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x4d); - - cpi->fn_ptr[BLOCK_16X8].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8); - cpi->fn_ptr[BLOCK_16X8].vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x8); - cpi->fn_ptr[BLOCK_16X8].svf = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar16x8); - cpi->fn_ptr[BLOCK_16X8].svf_halfpix_h = NULL; - cpi->fn_ptr[BLOCK_16X8].svf_halfpix_v = NULL; - cpi->fn_ptr[BLOCK_16X8].svf_halfpix_hv = NULL; - cpi->fn_ptr[BLOCK_16X8].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x3); - cpi->fn_ptr[BLOCK_16X8].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x8); - cpi->fn_ptr[BLOCK_16X8].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x4d); - - cpi->fn_ptr[BLOCK_8X16].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16); - cpi->fn_ptr[BLOCK_8X16].vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var8x16); - cpi->fn_ptr[BLOCK_8X16].svf = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar8x16); - cpi->fn_ptr[BLOCK_8X16].svf_halfpix_h = NULL; - cpi->fn_ptr[BLOCK_8X16].svf_halfpix_v = NULL; - cpi->fn_ptr[BLOCK_8X16].svf_halfpix_hv = NULL; - cpi->fn_ptr[BLOCK_8X16].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x3); - cpi->fn_ptr[BLOCK_8X16].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x8); - cpi->fn_ptr[BLOCK_8X16].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x4d); - - cpi->fn_ptr[BLOCK_8X8].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8); - cpi->fn_ptr[BLOCK_8X8].vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var8x8); - cpi->fn_ptr[BLOCK_8X8].svf = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar8x8); - cpi->fn_ptr[BLOCK_8X8].svf_halfpix_h = NULL; - cpi->fn_ptr[BLOCK_8X8].svf_halfpix_v = NULL; - cpi->fn_ptr[BLOCK_8X8].svf_halfpix_hv = NULL; - cpi->fn_ptr[BLOCK_8X8].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x3); - cpi->fn_ptr[BLOCK_8X8].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x8); - cpi->fn_ptr[BLOCK_8X8].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x4d); - - cpi->fn_ptr[BLOCK_4X4].sdf = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4); - cpi->fn_ptr[BLOCK_4X4].vf = VARIANCE_INVOKE(&cpi->rtcd.variance, var4x4); - cpi->fn_ptr[BLOCK_4X4].svf = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar4x4); - cpi->fn_ptr[BLOCK_4X4].svf_halfpix_h = NULL; - cpi->fn_ptr[BLOCK_4X4].svf_halfpix_v = NULL; - cpi->fn_ptr[BLOCK_4X4].svf_halfpix_hv = NULL; - cpi->fn_ptr[BLOCK_4X4].sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x3); - cpi->fn_ptr[BLOCK_4X4].sdx8f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x8); - cpi->fn_ptr[BLOCK_4X4].sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x4d); + BFP(BLOCK_16X16, vp8_sad16x16, vp8_variance16x16, vp8_sub_pixel_variance16x16, + vp8_variance_halfpixvar16x16_h, vp8_variance_halfpixvar16x16_v, + vp8_variance_halfpixvar16x16_hv, vp8_sad16x16x3, vp8_sad16x16x8, + vp8_sad16x16x4d) + + BFP(BLOCK_16X8, vp8_sad16x8, vp8_variance16x8, vp8_sub_pixel_variance16x8, + NULL, NULL, NULL, vp8_sad16x8x3, vp8_sad16x8x8, vp8_sad16x8x4d) + + BFP(BLOCK_8X16, vp8_sad8x16, vp8_variance8x16, vp8_sub_pixel_variance8x16, + NULL, NULL, NULL, vp8_sad8x16x3, vp8_sad8x16x8, vp8_sad8x16x4d) + + BFP(BLOCK_8X8, vp8_sad8x8, vp8_variance8x8, vp8_sub_pixel_variance8x8, + NULL, NULL, NULL, vp8_sad8x8x3, vp8_sad8x8x8, vp8_sad8x8x4d) + + BFP(BLOCK_4X4, vp8_sad4x4, vp8_variance4x4, vp8_sub_pixel_variance4x4, + NULL, NULL, NULL, vp8_sad4x4x3, vp8_sad4x4x8, vp8_sad4x4x4d) #if ARCH_X86 || ARCH_X86_64 - cpi->fn_ptr[BLOCK_16X16].copymem = VARIANCE_INVOKE(&cpi->rtcd.variance, copy32xn); - cpi->fn_ptr[BLOCK_16X8].copymem = VARIANCE_INVOKE(&cpi->rtcd.variance, copy32xn); - cpi->fn_ptr[BLOCK_8X16].copymem = VARIANCE_INVOKE(&cpi->rtcd.variance, copy32xn); - cpi->fn_ptr[BLOCK_8X8].copymem = VARIANCE_INVOKE(&cpi->rtcd.variance, copy32xn); - cpi->fn_ptr[BLOCK_4X4].copymem = VARIANCE_INVOKE(&cpi->rtcd.variance, copy32xn); + cpi->fn_ptr[BLOCK_16X16].copymem = vp8_copy32xn; + cpi->fn_ptr[BLOCK_16X8].copymem = vp8_copy32xn; + cpi->fn_ptr[BLOCK_8X16].copymem = vp8_copy32xn; + cpi->fn_ptr[BLOCK_8X8].copymem = vp8_copy32xn; + cpi->fn_ptr[BLOCK_4X4].copymem = vp8_copy32xn; #endif cpi->full_search_sad = SEARCH_INVOKE(&cpi->rtcd.search, full_search); @@ -2369,8 +2332,7 @@ void vp8_remove_compressor(VP8_PTR *ptr) { static uint64_t calc_plane_error(unsigned char *orig, int orig_stride, unsigned char *recon, int recon_stride, - unsigned int cols, unsigned int rows, - vp8_variance_rtcd_vtable_t *rtcd) { + unsigned int cols, unsigned int rows) { unsigned int row, col; uint64_t total_sse = 0; int diff; @@ -2379,9 +2341,7 @@ static uint64_t calc_plane_error(unsigned char *orig, int orig_stride, for (col = 0; col + 16 <= cols; col += 16) { unsigned int sse; - VARIANCE_INVOKE(rtcd, mse16x16)(orig + col, orig_stride, - recon + col, recon_stride, - &sse); + vp8_mse16x16(orig + col, orig_stride, recon + col, recon_stride, &sse); total_sse += sse; } @@ -2433,8 +2393,7 @@ static void generate_psnr_packet(VP8_COMP *cpi) { pkt.kind = VPX_CODEC_PSNR_PKT; sse = calc_plane_error(orig->y_buffer, orig->y_stride, recon->y_buffer, recon->y_stride, - width, height, - IF_RTCD(&cpi->rtcd.variance)); + width, height); pkt.data.psnr.sse[0] = sse; pkt.data.psnr.sse[1] = sse; pkt.data.psnr.samples[0] = width * height; @@ -2445,8 +2404,7 @@ static void generate_psnr_packet(VP8_COMP *cpi) { sse = calc_plane_error(orig->u_buffer, orig->uv_stride, recon->u_buffer, recon->uv_stride, - width, height, - IF_RTCD(&cpi->rtcd.variance)); + width, height); pkt.data.psnr.sse[0] += sse; pkt.data.psnr.sse[2] = sse; pkt.data.psnr.samples[0] += width * height; @@ -2454,8 +2412,7 @@ static void generate_psnr_packet(VP8_COMP *cpi) { sse = calc_plane_error(orig->v_buffer, orig->uv_stride, recon->v_buffer, recon->uv_stride, - width, height, - IF_RTCD(&cpi->rtcd.variance)); + width, height); pkt.data.psnr.sse[0] += sse; pkt.data.psnr.sse[3] = sse; pkt.data.psnr.samples[0] += width * height; @@ -3427,8 +3384,7 @@ static void encode_frame_to_data_rate if ((cm->frame_type == KEY_FRAME) && cpi->this_key_frame_forced) { int last_q = Q; int kf_err = vp8_calc_ss_err(cpi->Source, - &cm->yv12_fb[cm->new_fb_idx], - IF_RTCD(&cpi->rtcd.variance)); + &cm->yv12_fb[cm->new_fb_idx]); int high_err_target = cpi->ambient_err; int low_err_target = (cpi->ambient_err >> 1); @@ -3620,8 +3576,7 @@ static void encode_frame_to_data_rate if (Loop == FALSE && cm->frame_type != KEY_FRAME && sf->search_best_filter) { if (mcomp_filter_index < mcomp_filters) { INT64 err = vp8_calc_ss_err(cpi->Source, - &cm->yv12_fb[cm->new_fb_idx], - IF_RTCD(&cpi->rtcd.variance)); + &cm->yv12_fb[cm->new_fb_idx]); INT64 rate = cpi->projected_frame_size << 8; mcomp_filter_cost[mcomp_filter_index] = (RDCOST(cpi->RDMULT, cpi->RDDIV, rate, err)); @@ -3683,8 +3638,7 @@ static void encode_frame_to_data_rate // the force key frame if (cpi->next_key_frame_forced && (cpi->twopass.frames_to_key == 0)) { cpi->ambient_err = vp8_calc_ss_err(cpi->Source, - &cm->yv12_fb[cm->new_fb_idx], - IF_RTCD(&cpi->rtcd.variance)); + &cm->yv12_fb[cm->new_fb_idx]); } // This frame's MVs are saved and will be used in next frame's MV @@ -3902,8 +3856,7 @@ static void encode_frame_to_data_rate vp8_clear_system_state(); // __asm emms; recon_err = vp8_calc_ss_err(cpi->Source, - &cm->yv12_fb[cm->new_fb_idx], - IF_RTCD(&cpi->rtcd.variance)); + &cm->yv12_fb[cm->new_fb_idx]); if (cpi->twopass.total_left_stats->coded_error != 0.0) fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d" @@ -4389,16 +4342,16 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon int64_t sq_error; ye = calc_plane_error(orig->y_buffer, orig->y_stride, - recon->y_buffer, recon->y_stride, orig->y_width, orig->y_height, - IF_RTCD(&cpi->rtcd.variance)); + recon->y_buffer, recon->y_stride, orig->y_width, + orig->y_height); ue = calc_plane_error(orig->u_buffer, orig->uv_stride, - recon->u_buffer, recon->uv_stride, orig->uv_width, orig->uv_height, - IF_RTCD(&cpi->rtcd.variance)); + recon->u_buffer, recon->uv_stride, orig->uv_width, + orig->uv_height); ve = calc_plane_error(orig->v_buffer, orig->uv_stride, - recon->v_buffer, recon->uv_stride, orig->uv_width, orig->uv_height, - IF_RTCD(&cpi->rtcd.variance)); + recon->v_buffer, recon->uv_stride, orig->uv_width, + orig->uv_height); sq_error = ye + ue + ve; @@ -4418,16 +4371,16 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon vp8_clear_system_state(); ye = calc_plane_error(orig->y_buffer, orig->y_stride, - pp->y_buffer, pp->y_stride, orig->y_width, orig->y_height, - IF_RTCD(&cpi->rtcd.variance)); + pp->y_buffer, pp->y_stride, orig->y_width, + orig->y_height); ue = calc_plane_error(orig->u_buffer, orig->uv_stride, - pp->u_buffer, pp->uv_stride, orig->uv_width, orig->uv_height, - IF_RTCD(&cpi->rtcd.variance)); + pp->u_buffer, pp->uv_stride, orig->uv_width, + orig->uv_height); ve = calc_plane_error(orig->v_buffer, orig->uv_stride, - pp->v_buffer, pp->uv_stride, orig->uv_width, orig->uv_height, - IF_RTCD(&cpi->rtcd.variance)); + pp->v_buffer, pp->uv_stride, orig->uv_width, + orig->uv_height); sq_error = ye + ue + ve; @@ -4440,8 +4393,7 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon cpi->totalp += frame_psnr2; frame_ssim2 = vp8_calc_ssim(cpi->Source, - &cm->post_proc_buffer, 1, &weight, - IF_RTCD(&cpi->rtcd.variance)); + &cm->post_proc_buffer, 1, &weight); cpi->summed_quality += frame_ssim2 * weight; cpi->summed_weights += weight; @@ -4460,7 +4412,7 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon if (cpi->b_calculate_ssimg) { double y, u, v, frame_all; frame_all = vp8_calc_ssimg(cpi->Source, cm->frame_to_show, - &y, &u, &v, IF_RTCD(&cpi->rtcd.variance)); + &y, &u, &v); cpi->total_ssimg_y += y; cpi->total_ssimg_u += u; cpi->total_ssimg_v += v; @@ -4603,19 +4555,19 @@ int vp8_set_internal_size(VP8_PTR comp, VPX_SCALING horiz_mode, VPX_SCALING vert -int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, const vp8_variance_rtcd_vtable_t *rtcd) { +int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest) { int i, j; int Total = 0; unsigned char *src = source->y_buffer; unsigned char *dst = dest->y_buffer; - (void)rtcd; // Loop through the Y plane raw and reconstruction data summing (square differences) for (i = 0; i < source->y_height; i += 16) { for (j = 0; j < source->y_width; j += 16) { unsigned int sse; - Total += VARIANCE_INVOKE(rtcd, mse16x16)(src + j, source->y_stride, dst + j, dest->y_stride, &sse); + Total += vp8_mse16x16(src + j, source->y_stride, dst + j, dest->y_stride, + &sse); } src += 16 * source->y_stride; diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h index 1e74940..16d15e6 100644 --- a/vp8/encoder/onyx_int.h +++ b/vp8/encoder/onyx_int.h @@ -365,7 +365,6 @@ typedef struct { typedef struct VP8_ENCODER_RTCD { VP8_COMMON_RTCD *common; - vp8_variance_rtcd_vtable_t variance; vp8_fdct_rtcd_vtable_t fdct; vp8_encodemb_rtcd_vtable_t encodemb; vp8_search_rtcd_vtable_t search; diff --git a/vp8/encoder/picklpf.c b/vp8/encoder/picklpf.c index 9549978..57bd414 100644 --- a/vp8/encoder/picklpf.c +++ b/vp8/encoder/picklpf.c @@ -21,7 +21,8 @@ #include "vpx_ports/arm.h" #endif -extern int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, const vp8_variance_rtcd_vtable_t *rtcd); +extern int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, + YV12_BUFFER_CONFIG *dest); #if HAVE_ARMV7 extern void vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc); #endif @@ -71,7 +72,8 @@ vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst vpx_memcpy(dst_y, src_y, ystride * (linestocopy + 16)); } -static int vp8_calc_partial_ssl_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, int Fraction, const vp8_variance_rtcd_vtable_t *rtcd) { +static int vp8_calc_partial_ssl_err(YV12_BUFFER_CONFIG *source, + YV12_BUFFER_CONFIG *dest, int Fraction) { int i, j; int Total = 0; int srcoffset, dstoffset; @@ -79,7 +81,6 @@ static int vp8_calc_partial_ssl_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONF unsigned char *dst = dest->y_buffer; int linestocopy = (source->y_height >> (Fraction + 4)); - (void)rtcd; if (linestocopy < 1) linestocopy = 1; @@ -97,7 +98,8 @@ static int vp8_calc_partial_ssl_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONF for (i = 0; i < linestocopy; i += 16) { for (j = 0; j < source->y_width; j += 16) { unsigned int sse; - Total += VARIANCE_INVOKE(rtcd, mse16x16)(src + j, source->y_stride, dst + j, dest->y_stride, &sse); + Total += vp8_mse16x16(src + j, source->y_stride, dst + j, dest->y_stride, + &sse); } src += 16 * source->y_stride; @@ -179,7 +181,7 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) { // Get the err using the previous frame's filter value. vp8_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val); - best_err = vp8_calc_partial_ssl_err(sd, cm->frame_to_show, 3, IF_RTCD(&cpi->rtcd.variance)); + best_err = vp8_calc_partial_ssl_err(sd, cm->frame_to_show, 3); // Re-instate the unfiltered frame vp8_yv12_copy_partial_frame_ptr(&cpi->last_frame_uf, cm->frame_to_show, 3); @@ -192,7 +194,7 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) { vp8_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val); // Get the err for filtered frame - filt_err = vp8_calc_partial_ssl_err(sd, cm->frame_to_show, 3, IF_RTCD(&cpi->rtcd.variance)); + filt_err = vp8_calc_partial_ssl_err(sd, cm->frame_to_show, 3); // Re-instate the unfiltered frame vp8_yv12_copy_partial_frame_ptr(&cpi->last_frame_uf, cm->frame_to_show, 3); @@ -221,7 +223,7 @@ void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) { vp8_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val); // Get the err for filtered frame - filt_err = vp8_calc_partial_ssl_err(sd, cm->frame_to_show, 3, IF_RTCD(&cpi->rtcd.variance)); + filt_err = vp8_calc_partial_ssl_err(sd, cm->frame_to_show, 3); // Re-instate the unfiltered frame vp8_yv12_copy_partial_frame_ptr(&cpi->last_frame_uf, cm->frame_to_show, 3); @@ -308,7 +310,7 @@ void vp8cx_pick_filter_level_sg(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi, int segme vp8cx_set_alt_lf_level(cpi, filt_mid); vp8_loop_filter_frame_segment(cm, &cpi->mb.e_mbd, filt_mid, segment); - best_err = vp8_calc_ss_err(sd, cm->frame_to_show, IF_RTCD(&cpi->rtcd.variance)); + best_err = vp8_calc_ss_err(sd, cm->frame_to_show); filt_best = filt_mid; // Re-instate the unfiltered frame @@ -348,7 +350,7 @@ void vp8cx_pick_filter_level_sg(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi, int segme vp8cx_set_alt_lf_level(cpi, filt_low); vp8_loop_filter_frame_segment(cm, &cpi->mb.e_mbd, filt_low, segment); - filt_err = vp8_calc_ss_err(sd, cm->frame_to_show, IF_RTCD(&cpi->rtcd.variance)); + filt_err = vp8_calc_ss_err(sd, cm->frame_to_show); // Re-instate the unfiltered frame #if HAVE_ARMV7 @@ -383,7 +385,7 @@ void vp8cx_pick_filter_level_sg(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi, int segme vp8cx_set_alt_lf_level(cpi, filt_high); vp8_loop_filter_frame_segment(cm, &cpi->mb.e_mbd, filt_high, segment); - filt_err = vp8_calc_ss_err(sd, cm->frame_to_show, IF_RTCD(&cpi->rtcd.variance)); + filt_err = vp8_calc_ss_err(sd, cm->frame_to_show); // Re-instate the unfiltered frame #if HAVE_ARMV7 @@ -517,7 +519,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) { vp8cx_set_alt_lf_level(cpi, filt_mid); vp8_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_mid); - best_err = vp8_calc_ss_err(sd, cm->frame_to_show, IF_RTCD(&cpi->rtcd.variance)); + best_err = vp8_calc_ss_err(sd, cm->frame_to_show); filt_best = filt_mid; // Re-instate the unfiltered frame @@ -557,7 +559,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) { vp8cx_set_alt_lf_level(cpi, filt_low); vp8_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_low); - filt_err = vp8_calc_ss_err(sd, cm->frame_to_show, IF_RTCD(&cpi->rtcd.variance)); + filt_err = vp8_calc_ss_err(sd, cm->frame_to_show); // Re-instate the unfiltered frame #if HAVE_ARMV7 @@ -592,7 +594,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) { vp8cx_set_alt_lf_level(cpi, filt_high); vp8_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_high); - filt_err = vp8_calc_ss_err(sd, cm->frame_to_show, IF_RTCD(&cpi->rtcd.variance)); + filt_err = vp8_calc_ss_err(sd, cm->frame_to_show); // Re-instate the unfiltered frame #if HAVE_ARMV7 diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c index 22da82a..585581c 100644 --- a/vp8/encoder/rdopt.c +++ b/vp8/encoder/rdopt.c @@ -520,7 +520,7 @@ int vp8_mbuverror_c(MACROBLOCK *mb) { return error; } -int VP8_UVSSE(MACROBLOCK *x, const vp8_variance_rtcd_vtable_t *rtcd) { +int vp8_uvsse(MACROBLOCK *x) { unsigned char *uptr, *vptr; unsigned char *upred_ptr = (*(x->block[16].base_src) + x->block[16].src); unsigned char *vpred_ptr = (*(x->block[20].base_src) + x->block[20].src); @@ -551,16 +551,14 @@ int VP8_UVSSE(MACROBLOCK *x, const vp8_variance_rtcd_vtable_t *rtcd) { vptr = x->e_mbd.pre.v_buffer + offset; if ((mv_row | mv_col) & 7) { - VARIANCE_INVOKE(rtcd, subpixvar8x8)(uptr, pre_stride, - (mv_col & 7) << 1, (mv_row & 7) << 1, upred_ptr, uv_stride, &sse2); - VARIANCE_INVOKE(rtcd, subpixvar8x8)(vptr, pre_stride, - (mv_col & 7) << 1, (mv_row & 7) << 1, vpred_ptr, uv_stride, &sse1); + vp8_sub_pixel_variance8x8(uptr, pre_stride, (mv_col & 7) << 1, + (mv_row & 7) << 1, upred_ptr, uv_stride, &sse2); + vp8_sub_pixel_variance8x8(vptr, pre_stride, (mv_col & 7) << 1, + (mv_row & 7) << 1, vpred_ptr, uv_stride, &sse1); sse2 += sse1; } else { - VARIANCE_INVOKE(rtcd, var8x8)(uptr, pre_stride, - upred_ptr, uv_stride, &sse2); - VARIANCE_INVOKE(rtcd, var8x8)(vptr, pre_stride, - vpred_ptr, uv_stride, &sse1); + vp8_variance8x8(uptr, pre_stride, upred_ptr, uv_stride, &sse2); + vp8_variance8x8(vptr, pre_stride, vpred_ptr, uv_stride, &sse1); sse2 += sse1; } return sse2; @@ -3922,8 +3920,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int if (threshold < x->encode_breakout) threshold = x->encode_breakout; - var = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16) - (*(b->base_src), b->src_stride, + var = vp8_variance16x16(*(b->base_src), b->src_stride, x->e_mbd.predictor, 16, &sse); if (sse < threshold) { @@ -3933,7 +3930,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int if ((sse - var < q2dc *q2dc >> 4) || (sse / 2 > var && sse - var < 64)) { // Check u and v to make sure skip is ok - int sse2 = VP8_UVSSE(x, IF_RTCD(&cpi->rtcd.variance)); + int sse2 = vp8_uvsse(x); if (sse2 * 2 < threshold) { x->skip = 1; distortion2 = sse + sse2; @@ -4840,8 +4837,8 @@ int64_t vp8_rd_pick_inter_mode_sb(VP8_COMP *cpi, MACROBLOCK *x, if (threshold < x->encode_breakout) threshold = x->encode_breakout; - var = VARIANCE_INVOKE(&cpi->rtcd.variance, var32x32)(*(b->base_src), - b->src_stride, xd->dst.y_buffer, xd->dst.y_stride, &sse); + var = vp8_variance32x32(*(b->base_src), b->src_stride, + xd->dst.y_buffer, xd->dst.y_stride, &sse); if (sse < threshold) { unsigned int q2dc = xd->block[24].dequant[0]; @@ -4851,11 +4848,9 @@ int64_t vp8_rd_pick_inter_mode_sb(VP8_COMP *cpi, MACROBLOCK *x, (sse / 2 > var && sse - var < 64)) { // Check u and v to make sure skip is ok unsigned int sse2, sse3; - var += VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16) - (x->src.u_buffer, x->src.uv_stride, + var += vp8_variance16x16(x->src.u_buffer, x->src.uv_stride, xd->dst.u_buffer, xd->dst.uv_stride, &sse2); - var += VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16) - (x->src.v_buffer, x->src.uv_stride, + var += vp8_variance16x16(x->src.v_buffer, x->src.uv_stride, xd->dst.v_buffer, xd->dst.uv_stride, &sse3); sse2 += sse3; if (sse2 * 2 < threshold) { diff --git a/vp8/encoder/ssim.c b/vp8/encoder/ssim.c index d3d9711..865496a 100644 --- a/vp8/encoder/ssim.c +++ b/vp8/encoder/ssim.c @@ -11,18 +11,10 @@ #include "onyx_int.h" -void vp8_ssim_parms_16x16_c -( - unsigned char *s, - int sp, - unsigned char *r, - int rp, - unsigned long *sum_s, - unsigned long *sum_r, - unsigned long *sum_sq_s, - unsigned long *sum_sq_r, - unsigned long *sum_sxr -) { +void vp8_ssim_parms_16x16_c(unsigned char *s, int sp, unsigned char *r, + int rp, unsigned long *sum_s, unsigned long *sum_r, + unsigned long *sum_sq_s, unsigned long *sum_sq_r, + unsigned long *sum_sxr) { int i, j; for (i = 0; i < 16; i++, s += sp, r += rp) { for (j = 0; j < 16; j++) { @@ -34,18 +26,10 @@ void vp8_ssim_parms_16x16_c } } } -void vp8_ssim_parms_8x8_c -( - unsigned char *s, - int sp, - unsigned char *r, - int rp, - unsigned long *sum_s, - unsigned long *sum_r, - unsigned long *sum_sq_s, - unsigned long *sum_sq_r, - unsigned long *sum_sxr -) { +void vp8_ssim_parms_8x8_c(unsigned char *s, int sp, unsigned char *r, int rp, + unsigned long *sum_s, unsigned long *sum_r, + unsigned long *sum_sq_s, unsigned long *sum_sq_r, + unsigned long *sum_sxr) { int i, j; for (i = 0; i < 8; i++, s += sp, r += rp) { for (j = 0; j < 8; j++) { @@ -61,15 +45,9 @@ void vp8_ssim_parms_8x8_c const static int64_t cc1 = 26634; // (64^2*(.01*255)^2 const static int64_t cc2 = 239708; // (64^2*(.03*255)^2 -static double similarity -( - unsigned long sum_s, - unsigned long sum_r, - unsigned long sum_sq_s, - unsigned long sum_sq_r, - unsigned long sum_sxr, - int count -) { +static double similarity(unsigned long sum_s, unsigned long sum_r, + unsigned long sum_sq_s, unsigned long sum_sq_r, + unsigned long sum_sxr, int count) { int64_t ssim_n, ssim_d; int64_t c1, c2; @@ -87,23 +65,22 @@ static double similarity return ssim_n * 1.0 / ssim_d; } -static double ssim_16x16(unsigned char *s, int sp, unsigned char *r, int rp, - const vp8_variance_rtcd_vtable_t *rtcd) { +static double ssim_16x16(unsigned char *s, int sp, unsigned char *r, int rp) { unsigned long sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0; - SSIMPF_INVOKE(rtcd, 16x16)(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr); + vp8_ssim_parms_16x16(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, + &sum_sxr); return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 256); } -static double ssim_8x8(unsigned char *s, int sp, unsigned char *r, int rp, - const vp8_variance_rtcd_vtable_t *rtcd) { +static double ssim_8x8(unsigned char *s, int sp, unsigned char *r, int rp) { unsigned long sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0; - SSIMPF_INVOKE(rtcd, 8x8)(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr); + vp8_ssim_parms_8x8(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, + &sum_sxr); return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 64); } // TODO: (jbb) tried to scale this function such that we may be able to use it // for distortion metric in mode selection code ( provided we do a reconstruction) -long dssim(unsigned char *s, int sp, unsigned char *r, int rp, - const vp8_variance_rtcd_vtable_t *rtcd) { +long dssim(unsigned char *s, int sp, unsigned char *r, int rp) { unsigned long sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0; int64_t ssim3; int64_t ssim_n1, ssim_n2; @@ -115,7 +92,8 @@ long dssim(unsigned char *s, int sp, unsigned char *r, int rp, c1 = cc1 * 16; c2 = cc2 * 16; - SSIMPF_INVOKE(rtcd, 16x16)(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr); + vp8_ssim_parms_16x16(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, + &sum_sxr); ssim_n1 = (2 * sum_s * sum_r + c1); ssim_n2 = ((int64_t) 2 * 256 * sum_sxr - (int64_t) 2 * sum_s * sum_r + c2); @@ -137,16 +115,8 @@ long dssim(unsigned char *s, int sp, unsigned char *r, int rp, // We are using a 8x8 moving window with starting location of each 8x8 window // on the 4x4 pixel grid. Such arrangement allows the windows to overlap // block boundaries to penalize blocking artifacts. -double vp8_ssim2 -( - unsigned char *img1, - unsigned char *img2, - int stride_img1, - int stride_img2, - int width, - int height, - const vp8_variance_rtcd_vtable_t *rtcd -) { +double vp8_ssim2(unsigned char *img1, unsigned char *img2, int stride_img1, + int stride_img2, int width, int height) { int i, j; int samples = 0; double ssim_total = 0; @@ -154,7 +124,7 @@ double vp8_ssim2 // sample point start with each 4x4 location for (i = 0; i < height - 8; i += 4, img1 += stride_img1 * 4, img2 += stride_img2 * 4) { for (j = 0; j < width - 8; j += 4) { - double v = ssim_8x8(img1 + j, stride_img1, img2 + j, stride_img2, rtcd); + double v = ssim_8x8(img1 + j, stride_img1, img2 + j, stride_img2); ssim_total += v; samples++; } @@ -162,28 +132,22 @@ double vp8_ssim2 ssim_total /= samples; return ssim_total; } -double vp8_calc_ssim -( - YV12_BUFFER_CONFIG *source, - YV12_BUFFER_CONFIG *dest, - int lumamask, - double *weight, - const vp8_variance_rtcd_vtable_t *rtcd -) { +double vp8_calc_ssim(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, + int lumamask, double *weight) { double a, b, c; double ssimv; a = vp8_ssim2(source->y_buffer, dest->y_buffer, source->y_stride, dest->y_stride, source->y_width, - source->y_height, rtcd); + source->y_height); b = vp8_ssim2(source->u_buffer, dest->u_buffer, source->uv_stride, dest->uv_stride, source->uv_width, - source->uv_height, rtcd); + source->uv_height); c = vp8_ssim2(source->v_buffer, dest->v_buffer, source->uv_stride, dest->uv_stride, source->uv_width, - source->uv_height, rtcd); + source->uv_height); ssimv = a * .8 + .1 * (b + c); @@ -192,29 +156,22 @@ double vp8_calc_ssim return ssimv; } -double vp8_calc_ssimg -( - YV12_BUFFER_CONFIG *source, - YV12_BUFFER_CONFIG *dest, - double *ssim_y, - double *ssim_u, - double *ssim_v, - const vp8_variance_rtcd_vtable_t *rtcd -) { +double vp8_calc_ssimg(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, + double *ssim_y, double *ssim_u, double *ssim_v) { double ssim_all = 0; double a, b, c; a = vp8_ssim2(source->y_buffer, dest->y_buffer, source->y_stride, dest->y_stride, source->y_width, - source->y_height, rtcd); + source->y_height); b = vp8_ssim2(source->u_buffer, dest->u_buffer, source->uv_stride, dest->uv_stride, source->uv_width, - source->uv_height, rtcd); + source->uv_height); c = vp8_ssim2(source->v_buffer, dest->v_buffer, source->uv_stride, dest->uv_stride, source->uv_width, - source->uv_height, rtcd); + source->uv_height); *ssim_y = a; *ssim_u = b; *ssim_v = c; diff --git a/vp8/encoder/variance.h b/vp8/encoder/variance.h index a2fadfc..cdeb390 100644 --- a/vp8/encoder/variance.h +++ b/vp8/encoder/variance.h @@ -12,507 +12,73 @@ #ifndef VARIANCE_H #define VARIANCE_H -#include "vpx_config.h" - -#define prototype_sad(sym)\ - unsigned int (sym)\ - (\ - const unsigned char *src_ptr, \ - int source_stride, \ - const unsigned char *ref_ptr, \ - int ref_stride, \ - int max_sad\ - ) - -#define prototype_sad_multi_same_address(sym)\ - void (sym)\ - (\ - const unsigned char *src_ptr, \ - int source_stride, \ - const unsigned char *ref_ptr, \ - int ref_stride, \ - unsigned int *sad_array\ - ) - -#define prototype_sad_multi_same_address_1(sym)\ - void (sym)\ - (\ - const unsigned char *src_ptr, \ - int source_stride, \ - const unsigned char *ref_ptr, \ - int ref_stride, \ - unsigned short *sad_array\ - ) - -#define prototype_sad_multi_dif_address(sym)\ - void (sym)\ - (\ - const unsigned char *src_ptr, \ - int source_stride, \ - unsigned char *ref_ptr[4], \ - int ref_stride, \ - unsigned int *sad_array\ - ) - -#define prototype_variance(sym) \ - unsigned int (sym) \ - (\ - const unsigned char *src_ptr, \ - int source_stride, \ - const unsigned char *ref_ptr, \ - int ref_stride, \ - unsigned int *sse\ - ) - -#define prototype_variance2(sym) \ - unsigned int (sym) \ - (\ - const unsigned char *src_ptr, \ - int source_stride, \ - const unsigned char *ref_ptr, \ - int ref_stride, \ - unsigned int *sse,\ - int *sum\ - ) - -#define prototype_subpixvariance(sym) \ - unsigned int (sym) \ - ( \ - const unsigned char *src_ptr, \ - int source_stride, \ - int xoffset, \ - int yoffset, \ - const unsigned char *ref_ptr, \ - int Refstride, \ - unsigned int *sse \ - ); - -#define prototype_ssimpf(sym) \ - void (sym) \ - ( \ - unsigned char *s, \ - int sp, \ - unsigned char *r, \ - int rp, \ - unsigned long *sum_s, \ - unsigned long *sum_r, \ - unsigned long *sum_sq_s, \ - unsigned long *sum_sq_r, \ - unsigned long *sum_sxr \ - ); - -#define prototype_getmbss(sym) unsigned int (sym)(const short *) - -#define prototype_get16x16prederror(sym)\ - unsigned int (sym)\ - (\ - const unsigned char *src_ptr, \ - int source_stride, \ - const unsigned char *ref_ptr, \ - int ref_stride \ - ) - -#if ARCH_X86 || ARCH_X86_64 -#include "x86/variance_x86.h" -#endif - -#if ARCH_ARM -#include "arm/variance_arm.h" -#endif - -#ifndef vp8_variance_sad4x4 -#define vp8_variance_sad4x4 vp8_sad4x4_c -#endif -extern prototype_sad(vp8_variance_sad4x4); - -#ifndef vp8_variance_sad8x8 -#define vp8_variance_sad8x8 vp8_sad8x8_c -#endif -extern prototype_sad(vp8_variance_sad8x8); - -#ifndef vp8_variance_sad8x16 -#define vp8_variance_sad8x16 vp8_sad8x16_c -#endif -extern prototype_sad(vp8_variance_sad8x16); - -#ifndef vp8_variance_sad16x8 -#define vp8_variance_sad16x8 vp8_sad16x8_c -#endif -extern prototype_sad(vp8_variance_sad16x8); - -#ifndef vp8_variance_sad16x16 -#define vp8_variance_sad16x16 vp8_sad16x16_c -#endif -extern prototype_sad(vp8_variance_sad16x16); - -#ifndef vp8_variance_sad32x32 -#define vp8_variance_sad32x32 vp8_sad32x32_c -#endif -extern prototype_sad(vp8_variance_sad32x32); - -// -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- - -#ifndef vp8_variance_sad32x32x3 -#define vp8_variance_sad32x32x3 vp8_sad32x32x3_c -#endif -extern prototype_sad_multi_same_address(vp8_variance_sad32x32x3); - -#ifndef vp8_variance_sad16x16x3 -#define vp8_variance_sad16x16x3 vp8_sad16x16x3_c -#endif -extern prototype_sad_multi_same_address(vp8_variance_sad16x16x3); - -#ifndef vp8_variance_sad16x8x3 -#define vp8_variance_sad16x8x3 vp8_sad16x8x3_c -#endif -extern prototype_sad_multi_same_address(vp8_variance_sad16x8x3); - -#ifndef vp8_variance_sad8x8x3 -#define vp8_variance_sad8x8x3 vp8_sad8x8x3_c -#endif -extern prototype_sad_multi_same_address(vp8_variance_sad8x8x3); - -#ifndef vp8_variance_sad8x16x3 -#define vp8_variance_sad8x16x3 vp8_sad8x16x3_c -#endif -extern prototype_sad_multi_same_address(vp8_variance_sad8x16x3); - -#ifndef vp8_variance_sad4x4x3 -#define vp8_variance_sad4x4x3 vp8_sad4x4x3_c -#endif -extern prototype_sad_multi_same_address(vp8_variance_sad4x4x3); - -#ifndef vp8_variance_sad32x32x8 -#define vp8_variance_sad32x32x8 vp8_sad32x32x8_c -#endif -extern prototype_sad_multi_same_address_1(vp8_variance_sad32x32x8); - -#ifndef vp8_variance_sad16x16x8 -#define vp8_variance_sad16x16x8 vp8_sad16x16x8_c -#endif -extern prototype_sad_multi_same_address_1(vp8_variance_sad16x16x8); - -#ifndef vp8_variance_sad16x8x8 -#define vp8_variance_sad16x8x8 vp8_sad16x8x8_c -#endif -extern prototype_sad_multi_same_address_1(vp8_variance_sad16x8x8); - -#ifndef vp8_variance_sad8x8x8 -#define vp8_variance_sad8x8x8 vp8_sad8x8x8_c -#endif -extern prototype_sad_multi_same_address_1(vp8_variance_sad8x8x8); - -#ifndef vp8_variance_sad8x16x8 -#define vp8_variance_sad8x16x8 vp8_sad8x16x8_c -#endif -extern prototype_sad_multi_same_address_1(vp8_variance_sad8x16x8); - -#ifndef vp8_variance_sad4x4x8 -#define vp8_variance_sad4x4x8 vp8_sad4x4x8_c -#endif -extern prototype_sad_multi_same_address_1(vp8_variance_sad4x4x8); - -// -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- - -#ifndef vp8_variance_sad32x32x4d -#define vp8_variance_sad32x32x4d vp8_sad32x32x4d_c -#endif -extern prototype_sad_multi_dif_address(vp8_variance_sad32x32x4d); - -#ifndef vp8_variance_sad16x16x4d -#define vp8_variance_sad16x16x4d vp8_sad16x16x4d_c -#endif -extern prototype_sad_multi_dif_address(vp8_variance_sad16x16x4d); - -#ifndef vp8_variance_sad16x8x4d -#define vp8_variance_sad16x8x4d vp8_sad16x8x4d_c -#endif -extern prototype_sad_multi_dif_address(vp8_variance_sad16x8x4d); - -#ifndef vp8_variance_sad8x8x4d -#define vp8_variance_sad8x8x4d vp8_sad8x8x4d_c -#endif -extern prototype_sad_multi_dif_address(vp8_variance_sad8x8x4d); - -#ifndef vp8_variance_sad8x16x4d -#define vp8_variance_sad8x16x4d vp8_sad8x16x4d_c -#endif -extern prototype_sad_multi_dif_address(vp8_variance_sad8x16x4d); - -#ifndef vp8_variance_sad4x4x4d -#define vp8_variance_sad4x4x4d vp8_sad4x4x4d_c -#endif -extern prototype_sad_multi_dif_address(vp8_variance_sad4x4x4d); - -#if ARCH_X86 || ARCH_X86_64 -#ifndef vp8_variance_copy32xn -#define vp8_variance_copy32xn vp8_copy32xn_c -#endif -extern prototype_sad(vp8_variance_copy32xn); -#endif - -// -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- - -#ifndef vp8_variance_var4x4 -#define vp8_variance_var4x4 vp8_variance4x4_c -#endif -extern prototype_variance(vp8_variance_var4x4); - -#ifndef vp8_variance_var8x8 -#define vp8_variance_var8x8 vp8_variance8x8_c -#endif -extern prototype_variance(vp8_variance_var8x8); - -#ifndef vp8_variance_var8x16 -#define vp8_variance_var8x16 vp8_variance8x16_c -#endif -extern prototype_variance(vp8_variance_var8x16); - -#ifndef vp8_variance_var16x8 -#define vp8_variance_var16x8 vp8_variance16x8_c -#endif -extern prototype_variance(vp8_variance_var16x8); - -#ifndef vp8_variance_var16x16 -#define vp8_variance_var16x16 vp8_variance16x16_c -#endif -extern prototype_variance(vp8_variance_var16x16); - -#ifndef vp8_variance_var32x32 -#define vp8_variance_var32x32 vp8_variance32x32_c -#endif -extern prototype_variance(vp8_variance_var32x32); - -// -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- - -#ifndef vp8_variance_subpixvar4x4 -#define vp8_variance_subpixvar4x4 vp8_sub_pixel_variance4x4_c -#endif -extern prototype_subpixvariance(vp8_variance_subpixvar4x4); - -#ifndef vp8_variance_subpixvar8x8 -#define vp8_variance_subpixvar8x8 vp8_sub_pixel_variance8x8_c -#endif -extern prototype_subpixvariance(vp8_variance_subpixvar8x8); - -#ifndef vp8_variance_subpixvar8x16 -#define vp8_variance_subpixvar8x16 vp8_sub_pixel_variance8x16_c -#endif -extern prototype_subpixvariance(vp8_variance_subpixvar8x16); - -#ifndef vp8_variance_subpixvar16x8 -#define vp8_variance_subpixvar16x8 vp8_sub_pixel_variance16x8_c -#endif -extern prototype_subpixvariance(vp8_variance_subpixvar16x8); - -#ifndef vp8_variance_subpixvar16x16 -#define vp8_variance_subpixvar16x16 vp8_sub_pixel_variance16x16_c -#endif -extern prototype_subpixvariance(vp8_variance_subpixvar16x16); - -#ifndef vp8_variance_subpixvar32x32 -#define vp8_variance_subpixvar32x32 vp8_sub_pixel_variance32x32_c -#endif -extern prototype_subpixvariance(vp8_variance_subpixvar32x32); - -#ifndef vp8_variance_halfpixvar16x16_h -#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_c -#endif -extern prototype_variance(vp8_variance_halfpixvar16x16_h); - -#ifndef vp8_variance_halfpixvar32x32_h -#define vp8_variance_halfpixvar32x32_h vp8_variance_halfpixvar32x32_h_c -#endif -extern prototype_variance(vp8_variance_halfpixvar32x32_h); - -#ifndef vp8_variance_halfpixvar16x16_v -#define vp8_variance_halfpixvar16x16_v vp8_variance_halfpixvar16x16_v_c -#endif -extern prototype_variance(vp8_variance_halfpixvar16x16_v); - -#ifndef vp8_variance_halfpixvar32x32_v -#define vp8_variance_halfpixvar32x32_v vp8_variance_halfpixvar32x32_v_c -#endif -extern prototype_variance(vp8_variance_halfpixvar32x32_v); - -#ifndef vp8_variance_halfpixvar16x16_hv -#define vp8_variance_halfpixvar16x16_hv vp8_variance_halfpixvar16x16_hv_c -#endif -extern prototype_variance(vp8_variance_halfpixvar16x16_hv); - -#ifndef vp8_variance_halfpixvar32x32_hv -#define vp8_variance_halfpixvar32x32_hv vp8_variance_halfpixvar32x32_hv_c -#endif -extern prototype_variance(vp8_variance_halfpixvar32x32_hv); - -#ifndef vp8_variance_subpixmse16x16 -#define vp8_variance_subpixmse16x16 vp8_sub_pixel_mse16x16_c -#endif -extern prototype_subpixvariance(vp8_variance_subpixmse16x16); - -#ifndef vp8_variance_subpixmse32x32 -#define vp8_variance_subpixmse32x32 vp8_sub_pixel_mse32x32_c -#endif -extern prototype_subpixvariance(vp8_variance_subpixmse32x32); - -// -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- - -#ifndef vp8_variance_getmbss -#define vp8_variance_getmbss vp8_get_mb_ss_c -#endif -extern prototype_getmbss(vp8_variance_getmbss); - -#ifndef vp8_variance_mse16x16 -#define vp8_variance_mse16x16 vp8_mse16x16_c -#endif -extern prototype_variance(vp8_variance_mse16x16); - -#ifndef vp8_ssimpf_8x8 -#define vp8_ssimpf_8x8 vp8_ssim_parms_8x8_c -#endif -extern prototype_ssimpf(vp8_ssimpf_8x8) - -#ifndef vp8_ssimpf_16x16 -#define vp8_ssimpf_16x16 vp8_ssim_parms_16x16_c -#endif -extern prototype_ssimpf(vp8_ssimpf_16x16) - -#ifndef vp8_variance_satd16x16 -#define vp8_variance_satd16x16 vp8_satd16x16_c -#endif -extern prototype_variance(vp8_variance_satd16x16); - -typedef prototype_sad(*vp8_sad_fn_t); -typedef prototype_sad_multi_same_address(*vp8_sad_multi_fn_t); -typedef prototype_sad_multi_same_address_1(*vp8_sad_multi1_fn_t); -typedef prototype_sad_multi_dif_address(*vp8_sad_multi_d_fn_t); -typedef prototype_variance(*vp8_variance_fn_t); -typedef prototype_variance2(*vp8_variance2_fn_t); -typedef prototype_subpixvariance(*vp8_subpixvariance_fn_t); -typedef prototype_getmbss(*vp8_getmbss_fn_t); -typedef prototype_ssimpf(*vp8_ssimpf_fn_t); -typedef prototype_get16x16prederror(*vp8_get16x16prederror_fn_t); - -typedef struct { - vp8_sad_fn_t sad4x4; - vp8_sad_fn_t sad8x8; - vp8_sad_fn_t sad8x16; - vp8_sad_fn_t sad16x8; - vp8_sad_fn_t sad16x16; -#if CONFIG_SUPERBLOCKS - vp8_sad_fn_t sad32x32; -#endif - - vp8_variance_fn_t var4x4; - vp8_variance_fn_t var8x8; - vp8_variance_fn_t var8x16; - vp8_variance_fn_t var16x8; - vp8_variance_fn_t var16x16; -#if CONFIG_SUPERBLOCKS - vp8_variance_fn_t var32x32; -#endif - - vp8_subpixvariance_fn_t subpixvar4x4; - vp8_subpixvariance_fn_t subpixvar8x8; - vp8_subpixvariance_fn_t subpixvar8x16; - vp8_subpixvariance_fn_t subpixvar16x8; - vp8_subpixvariance_fn_t subpixvar16x16; -#if CONFIG_SUPERBLOCKS - vp8_subpixvariance_fn_t subpixvar32x32; -#endif - vp8_variance_fn_t halfpixvar16x16_h; - vp8_variance_fn_t halfpixvar32x32_h; - vp8_variance_fn_t halfpixvar16x16_v; -#if CONFIG_SUPERBLOCKS - vp8_variance_fn_t halfpixvar32x32_v; -#endif - vp8_variance_fn_t halfpixvar16x16_hv; -#if CONFIG_SUPERBLOCKS - vp8_variance_fn_t halfpixvar32x32_hv; -#endif - vp8_subpixvariance_fn_t subpixmse16x16; -#if CONFIG_SUPERBLOCKS - vp8_subpixvariance_fn_t subpixmse32x32; -#endif - - vp8_getmbss_fn_t getmbss; - vp8_variance_fn_t mse16x16; - -#if CONFIG_SUPERBLOCKS - vp8_sad_multi_fn_t sad32x32x3; -#endif - vp8_sad_multi_fn_t sad16x16x3; - vp8_sad_multi_fn_t sad16x8x3; - vp8_sad_multi_fn_t sad8x16x3; - vp8_sad_multi_fn_t sad8x8x3; - vp8_sad_multi_fn_t sad4x4x3; - -#if CONFIG_SUPERBLOCKS - vp8_sad_multi1_fn_t sad32x32x8; -#endif - vp8_sad_multi1_fn_t sad16x16x8; - vp8_sad_multi1_fn_t sad16x8x8; - vp8_sad_multi1_fn_t sad8x16x8; - vp8_sad_multi1_fn_t sad8x8x8; - vp8_sad_multi1_fn_t sad4x4x8; - -#if CONFIG_SUPERBLOCKS - vp8_sad_multi_d_fn_t sad32x32x4d; -#endif - vp8_sad_multi_d_fn_t sad16x16x4d; - vp8_sad_multi_d_fn_t sad16x8x4d; - vp8_sad_multi_d_fn_t sad8x16x4d; - vp8_sad_multi_d_fn_t sad8x8x4d; - vp8_sad_multi_d_fn_t sad4x4x4d; - -#if ARCH_X86 || ARCH_X86_64 - vp8_sad_fn_t copy32xn; -#endif - -#if CONFIG_INTERNAL_STATS - vp8_ssimpf_fn_t ssimpf_8x8; - vp8_ssimpf_fn_t ssimpf_16x16; -#endif - - vp8_variance_fn_t satd16x16; -} vp8_variance_rtcd_vtable_t; - -typedef struct { - vp8_sad_fn_t sdf; - vp8_variance_fn_t vf; - vp8_subpixvariance_fn_t svf; - vp8_variance_fn_t svf_halfpix_h; - vp8_variance_fn_t svf_halfpix_v; - vp8_variance_fn_t svf_halfpix_hv; - vp8_sad_multi_fn_t sdx3f; - vp8_sad_multi1_fn_t sdx8f; - vp8_sad_multi_d_fn_t sdx4df; -#if ARCH_X86 || ARCH_X86_64 - vp8_sad_fn_t copymem; -#endif +typedef unsigned int(*vp8_sad_fn_t)(const unsigned char *src_ptr, + int source_stride, + const unsigned char *ref_ptr, + int ref_stride, + unsigned int max_sad); + +typedef void (*vp8_copy32xn_fn_t)(const unsigned char *src_ptr, + int source_stride, + const unsigned char *ref_ptr, + int ref_stride, + int n); + +typedef void (*vp8_sad_multi_fn_t)(const unsigned char *src_ptr, + int source_stride, + const unsigned char *ref_ptr, + int ref_stride, + unsigned int *sad_array); + +typedef void (*vp8_sad_multi1_fn_t)(const unsigned char *src_ptr, + int source_stride, + const unsigned char *ref_ptr, + int ref_stride, + unsigned short *sad_array); + +typedef void (*vp8_sad_multi_d_fn_t)(const unsigned char *src_ptr, + int source_stride, + const unsigned char * const ref_ptr[], + int ref_stride, unsigned int *sad_array); + +typedef unsigned int (*vp8_variance_fn_t)(const unsigned char *src_ptr, + int source_stride, + const unsigned char *ref_ptr, + int ref_stride, + unsigned int *sse); + +typedef unsigned int (*vp8_subpixvariance_fn_t)(const unsigned char *src_ptr, + int source_stride, + int xoffset, + int yoffset, + const unsigned char *ref_ptr, + int Refstride, + unsigned int *sse); + +typedef void (*vp8_ssimpf_fn_t)(unsigned char *s, int sp, unsigned char *r, + int rp, unsigned long *sum_s, + unsigned long *sum_r, unsigned long *sum_sq_s, + unsigned long *sum_sq_r, + unsigned long *sum_sxr); + +typedef unsigned int (*vp8_getmbss_fn_t)(const short *); + +typedef unsigned int (*vp8_get16x16prederror_fn_t)(const unsigned char *src_ptr, + int source_stride, + const unsigned char *ref_ptr, + int ref_stride); + +typedef struct variance_vtable { + vp8_sad_fn_t sdf; + vp8_variance_fn_t vf; + vp8_subpixvariance_fn_t svf; + vp8_variance_fn_t svf_halfpix_h; + vp8_variance_fn_t svf_halfpix_v; + vp8_variance_fn_t svf_halfpix_hv; + vp8_sad_multi_fn_t sdx3f; + vp8_sad_multi1_fn_t sdx8f; + vp8_sad_multi_d_fn_t sdx4df; + vp8_copy32xn_fn_t copymem; } vp8_variance_fn_ptr_t; -#if CONFIG_RUNTIME_CPU_DETECT -#define VARIANCE_INVOKE(ctx,fn) (ctx)->fn -#define SSIMPF_INVOKE(ctx,fn) (ctx)->ssimpf_##fn -#else -#define VARIANCE_INVOKE(ctx,fn) vp8_variance_##fn -#define SSIMPF_INVOKE(ctx,fn) vp8_ssimpf_##fn -#endif - -#if CONFIG_NEWBESTREFMV -unsigned int vp8_sad2x16_c( - const unsigned char *src_ptr, - int src_stride, - const unsigned char *ref_ptr, - int ref_stride, - int max_sad); -unsigned int vp8_sad16x2_c( - const unsigned char *src_ptr, - int src_stride, - const unsigned char *ref_ptr, - int ref_stride, - int max_sad); -#endif - #endif diff --git a/vp8/encoder/x86/variance_x86.h b/vp8/encoder/x86/variance_x86.h deleted file mode 100644 index 0971f11..0000000 --- a/vp8/encoder/x86/variance_x86.h +++ /dev/null @@ -1,328 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VARIANCE_X86_H -#define VARIANCE_X86_H - - -/* Note: - * - * This platform is commonly built for runtime CPU detection. If you modify - * any of the function mappings present in this file, be sure to also update - * them in the function pointer initialization code - */ -#if HAVE_MMX -extern prototype_sad(vp8_sad4x4_mmx); -extern prototype_sad(vp8_sad8x8_mmx); -extern prototype_sad(vp8_sad8x16_mmx); -extern prototype_sad(vp8_sad16x8_mmx); -extern prototype_sad(vp8_sad16x16_mmx); -extern prototype_variance(vp8_variance4x4_mmx); -extern prototype_variance(vp8_variance8x8_mmx); -extern prototype_variance(vp8_variance8x16_mmx); -extern prototype_variance(vp8_variance16x8_mmx); -extern prototype_variance(vp8_variance16x16_mmx); -extern prototype_subpixvariance(vp8_sub_pixel_variance4x4_mmx); -extern prototype_subpixvariance(vp8_sub_pixel_variance8x8_mmx); -extern prototype_subpixvariance(vp8_sub_pixel_variance8x16_mmx); -extern prototype_subpixvariance(vp8_sub_pixel_variance16x8_mmx); -extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_mmx); -extern prototype_variance(vp8_variance_halfpixvar16x16_h_mmx); -extern prototype_variance(vp8_variance_halfpixvar16x16_v_mmx); -extern prototype_variance(vp8_variance_halfpixvar16x16_hv_mmx); -extern prototype_subpixvariance(vp8_sub_pixel_mse16x16_mmx); -extern prototype_getmbss(vp8_get_mb_ss_mmx); -extern prototype_variance(vp8_mse16x16_mmx); -extern prototype_variance2(vp8_get8x8var_mmx); - -#if !CONFIG_RUNTIME_CPU_DETECT -#undef vp8_variance_sad4x4 -#define vp8_variance_sad4x4 vp8_sad4x4_mmx - -#undef vp8_variance_sad8x8 -#define vp8_variance_sad8x8 vp8_sad8x8_mmx - -#undef vp8_variance_sad8x16 -#define vp8_variance_sad8x16 vp8_sad8x16_mmx - -#undef vp8_variance_sad16x8 -#define vp8_variance_sad16x8 vp8_sad16x8_mmx - -#undef vp8_variance_sad16x16 -#define vp8_variance_sad16x16 vp8_sad16x16_mmx - -#undef vp8_variance_var4x4 -#define vp8_variance_var4x4 vp8_variance4x4_mmx - -#undef vp8_variance_var8x8 -#define vp8_variance_var8x8 vp8_variance8x8_mmx - -#undef vp8_variance_var8x16 -#define vp8_variance_var8x16 vp8_variance8x16_mmx - -#undef vp8_variance_var16x8 -#define vp8_variance_var16x8 vp8_variance16x8_mmx - -#undef vp8_variance_var16x16 -#define vp8_variance_var16x16 vp8_variance16x16_mmx - -#undef vp8_variance_subpixvar4x4 -#define vp8_variance_subpixvar4x4 vp8_sub_pixel_variance4x4_mmx - -#undef vp8_variance_subpixvar8x8 -#define vp8_variance_subpixvar8x8 vp8_sub_pixel_variance8x8_mmx - -#undef vp8_variance_subpixvar8x16 -#define vp8_variance_subpixvar8x16 vp8_sub_pixel_variance8x16_mmx - -#undef vp8_variance_subpixvar16x8 -#define vp8_variance_subpixvar16x8 vp8_sub_pixel_variance16x8_mmx - -#undef vp8_variance_subpixvar16x16 -#define vp8_variance_subpixvar16x16 vp8_sub_pixel_variance16x16_mmx - -#undef vp8_variance_halfpixvar16x16_h -#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_mmx - -#undef vp8_variance_halfpixvar16x16_v -#define vp8_variance_halfpixvar16x16_v vp8_variance_halfpixvar16x16_v_mmx - -#undef vp8_variance_halfpixvar16x16_hv -#define vp8_variance_halfpixvar16x16_hv vp8_variance_halfpixvar16x16_hv_mmx - -#undef vp8_variance_subpixmse16x16 -#define vp8_variance_subpixmse16x16 vp8_sub_pixel_mse16x16_mmx - -#undef vp8_variance_getmbss -#define vp8_variance_getmbss vp8_get_mb_ss_mmx - -#undef vp8_variance_mse16x16 -#define vp8_variance_mse16x16 vp8_mse16x16_mmx - -#endif -#endif - - -#if HAVE_SSE2 -extern prototype_sad(vp8_sad4x4_wmt); -extern prototype_sad(vp8_sad8x8_wmt); -extern prototype_sad(vp8_sad8x16_wmt); -extern prototype_sad(vp8_sad16x8_wmt); -extern prototype_sad(vp8_sad16x16_wmt); -extern prototype_sad(vp8_copy32xn_sse2); -extern prototype_variance(vp8_variance4x4_wmt); -extern prototype_variance(vp8_variance8x8_wmt); -extern prototype_variance(vp8_variance8x16_wmt); -extern prototype_variance(vp8_variance16x8_wmt); -extern prototype_variance(vp8_variance16x16_wmt); -extern prototype_subpixvariance(vp8_sub_pixel_variance4x4_wmt); -extern prototype_subpixvariance(vp8_sub_pixel_variance8x8_wmt); -extern prototype_subpixvariance(vp8_sub_pixel_variance8x16_wmt); -extern prototype_subpixvariance(vp8_sub_pixel_variance16x8_wmt); -extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_wmt); -extern prototype_variance(vp8_variance_halfpixvar16x16_h_wmt); -extern prototype_variance(vp8_variance_halfpixvar16x16_v_wmt); -extern prototype_variance(vp8_variance_halfpixvar16x16_hv_wmt); -extern prototype_subpixvariance(vp8_sub_pixel_mse16x16_wmt); -extern prototype_getmbss(vp8_get_mb_ss_sse2); -extern prototype_variance(vp8_mse16x16_wmt); -extern prototype_variance2(vp8_get8x8var_sse2); -extern prototype_variance2(vp8_get16x16var_sse2); -extern prototype_ssimpf(vp8_ssim_parms_8x8_sse2) -extern prototype_ssimpf(vp8_ssim_parms_16x16_sse2) - -#if !CONFIG_RUNTIME_CPU_DETECT -#undef vp8_variance_sad4x4 -#define vp8_variance_sad4x4 vp8_sad4x4_wmt - -#undef vp8_variance_sad8x8 -#define vp8_variance_sad8x8 vp8_sad8x8_wmt - -#undef vp8_variance_sad8x16 -#define vp8_variance_sad8x16 vp8_sad8x16_wmt - -#undef vp8_variance_sad16x8 -#define vp8_variance_sad16x8 vp8_sad16x8_wmt - -#undef vp8_variance_sad16x16 -#define vp8_variance_sad16x16 vp8_sad16x16_wmt - -#undef vp8_variance_copy32xn -#define vp8_variance_copy32xn vp8_copy32xn_sse2 - -#undef vp8_variance_var4x4 -#define vp8_variance_var4x4 vp8_variance4x4_wmt - -#undef vp8_variance_var8x8 -#define vp8_variance_var8x8 vp8_variance8x8_wmt - -#undef vp8_variance_var8x16 -#define vp8_variance_var8x16 vp8_variance8x16_wmt - -#undef vp8_variance_var16x8 -#define vp8_variance_var16x8 vp8_variance16x8_wmt - -#undef vp8_variance_var16x16 -#define vp8_variance_var16x16 vp8_variance16x16_wmt - -#undef vp8_variance_subpixvar4x4 -#define vp8_variance_subpixvar4x4 vp8_sub_pixel_variance4x4_wmt - -#undef vp8_variance_subpixvar8x8 -#define vp8_variance_subpixvar8x8 vp8_sub_pixel_variance8x8_wmt - -#undef vp8_variance_subpixvar8x16 -#define vp8_variance_subpixvar8x16 vp8_sub_pixel_variance8x16_wmt - -#undef vp8_variance_subpixvar16x8 -#define vp8_variance_subpixvar16x8 vp8_sub_pixel_variance16x8_wmt - -#undef vp8_variance_subpixvar16x16 -#define vp8_variance_subpixvar16x16 vp8_sub_pixel_variance16x16_wmt - -#undef vp8_variance_halfpixvar16x16_h -#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_wmt - -#undef vp8_variance_halfpixvar16x16_v -#define vp8_variance_halfpixvar16x16_v vp8_variance_halfpixvar16x16_v_wmt - -#undef vp8_variance_halfpixvar16x16_hv -#define vp8_variance_halfpixvar16x16_hv vp8_variance_halfpixvar16x16_hv_wmt - -#undef vp8_variance_subpixmse16x16 -#define vp8_variance_subpixmse16x16 vp8_sub_pixel_mse16x16_wmt - -#undef vp8_variance_getmbss -#define vp8_variance_getmbss vp8_get_mb_ss_sse2 - -#undef vp8_variance_mse16x16 -#define vp8_variance_mse16x16 vp8_mse16x16_wmt - -#if ARCH_X86_64 -#undef vp8_ssimpf_8x8 -#define vp8_ssimpf_8x8 vp8_ssim_parms_8x8_sse2 - -#undef vp8_ssimpf_16x16 -#define vp8_ssimpf_16x16 vp8_ssim_parms_16x16_sse2 -#endif - -#endif -#endif - - -#if HAVE_SSE3 -extern prototype_sad(vp8_sad16x16_sse3); -extern prototype_sad(vp8_sad16x8_sse3); -extern prototype_sad_multi_same_address(vp8_sad16x16x3_sse3); -extern prototype_sad_multi_same_address(vp8_sad16x8x3_sse3); -extern prototype_sad_multi_same_address(vp8_sad8x16x3_sse3); -extern prototype_sad_multi_same_address(vp8_sad8x8x3_sse3); -extern prototype_sad_multi_same_address(vp8_sad4x4x3_sse3); - -extern prototype_sad_multi_dif_address(vp8_sad16x16x4d_sse3); -extern prototype_sad_multi_dif_address(vp8_sad16x8x4d_sse3); -extern prototype_sad_multi_dif_address(vp8_sad8x16x4d_sse3); -extern prototype_sad_multi_dif_address(vp8_sad8x8x4d_sse3); -extern prototype_sad_multi_dif_address(vp8_sad4x4x4d_sse3); -extern prototype_sad(vp8_copy32xn_sse3); - -#if !CONFIG_RUNTIME_CPU_DETECT - -#undef vp8_variance_sad16x16 -#define vp8_variance_sad16x16 vp8_sad16x16_sse3 - -#undef vp8_variance_sad16x16x3 -#define vp8_variance_sad16x16x3 vp8_sad16x16x3_sse3 - -#undef vp8_variance_sad16x8x3 -#define vp8_variance_sad16x8x3 vp8_sad16x8x3_sse3 - -#undef vp8_variance_sad8x16x3 -#define vp8_variance_sad8x16x3 vp8_sad8x16x3_sse3 - -#undef vp8_variance_sad8x8x3 -#define vp8_variance_sad8x8x3 vp8_sad8x8x3_sse3 - -#undef vp8_variance_sad4x4x3 -#define vp8_variance_sad4x4x3 vp8_sad4x4x3_sse3 - -#undef vp8_variance_sad16x16x4d -#define vp8_variance_sad16x16x4d vp8_sad16x16x4d_sse3 - -#undef vp8_variance_sad16x8x4d -#define vp8_variance_sad16x8x4d vp8_sad16x8x4d_sse3 - -#undef vp8_variance_sad8x16x4d -#define vp8_variance_sad8x16x4d vp8_sad8x16x4d_sse3 - -#undef vp8_variance_sad8x8x4d -#define vp8_variance_sad8x8x4d vp8_sad8x8x4d_sse3 - -#undef vp8_variance_sad4x4x4d -#define vp8_variance_sad4x4x4d vp8_sad4x4x4d_sse3 - -#undef vp8_variance_copy32xn -#define vp8_variance_copy32xn vp8_copy32xn_sse3 - -#endif -#endif - - -#if HAVE_SSSE3 -extern prototype_sad_multi_same_address(vp8_sad16x16x3_ssse3); -extern prototype_sad_multi_same_address(vp8_sad16x8x3_ssse3); -extern prototype_subpixvariance(vp8_sub_pixel_variance16x8_ssse3); -extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_ssse3); - -#if !CONFIG_RUNTIME_CPU_DETECT -#undef vp8_variance_sad16x16x3 -#define vp8_variance_sad16x16x3 vp8_sad16x16x3_ssse3 - -#undef vp8_variance_sad16x8x3 -#define vp8_variance_sad16x8x3 vp8_sad16x8x3_ssse3 - -#undef vp8_variance_subpixvar16x8 -#define vp8_variance_subpixvar16x8 vp8_sub_pixel_variance16x8_ssse3 - -#undef vp8_variance_subpixvar16x16 -#define vp8_variance_subpixvar16x16 vp8_sub_pixel_variance16x16_ssse3 - -#endif -#endif - - -#if HAVE_SSE4_1 -extern prototype_sad_multi_same_address_1(vp8_sad16x16x8_sse4); -extern prototype_sad_multi_same_address_1(vp8_sad16x8x8_sse4); -extern prototype_sad_multi_same_address_1(vp8_sad8x16x8_sse4); -extern prototype_sad_multi_same_address_1(vp8_sad8x8x8_sse4); -extern prototype_sad_multi_same_address_1(vp8_sad4x4x8_sse4); - -#if !CONFIG_RUNTIME_CPU_DETECT -#undef vp8_variance_sad16x16x8 -#define vp8_variance_sad16x16x8 vp8_sad16x16x8_sse4 - -#undef vp8_variance_sad16x8x8 -#define vp8_variance_sad16x8x8 vp8_sad16x8x8_sse4 - -#undef vp8_variance_sad8x16x8 -#define vp8_variance_sad8x16x8 vp8_sad8x16x8_sse4 - -#undef vp8_variance_sad8x8x8 -#define vp8_variance_sad8x8x8 vp8_sad8x8x8_sse4 - -#undef vp8_variance_sad4x4x8 -#define vp8_variance_sad4x4x8 vp8_sad4x4x8_sse4 - -#endif -#endif - -#endif diff --git a/vp8/encoder/x86/x86_csystemdependent.c b/vp8/encoder/x86/x86_csystemdependent.c index 71c51c1..a169b49 100644 --- a/vp8/encoder/x86/x86_csystemdependent.c +++ b/vp8/encoder/x86/x86_csystemdependent.c @@ -90,31 +90,6 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi) { /* Override default functions with fastest ones for this CPU. */ #if HAVE_MMX if (flags & HAS_MMX) { - cpi->rtcd.variance.sad16x16 = vp8_sad16x16_mmx; - cpi->rtcd.variance.sad16x8 = vp8_sad16x8_mmx; - cpi->rtcd.variance.sad8x16 = vp8_sad8x16_mmx; - cpi->rtcd.variance.sad8x8 = vp8_sad8x8_mmx; - cpi->rtcd.variance.sad4x4 = vp8_sad4x4_mmx; - - cpi->rtcd.variance.var4x4 = vp8_variance4x4_mmx; - cpi->rtcd.variance.var8x8 = vp8_variance8x8_mmx; - cpi->rtcd.variance.var8x16 = vp8_variance8x16_mmx; - cpi->rtcd.variance.var16x8 = vp8_variance16x8_mmx; - cpi->rtcd.variance.var16x16 = vp8_variance16x16_mmx; - - cpi->rtcd.variance.subpixvar4x4 = vp8_sub_pixel_variance4x4_mmx; - cpi->rtcd.variance.subpixvar8x8 = vp8_sub_pixel_variance8x8_mmx; - cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_mmx; - cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_mmx; - cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_mmx; - cpi->rtcd.variance.halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_mmx; - cpi->rtcd.variance.halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_mmx; - cpi->rtcd.variance.halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_mmx; - cpi->rtcd.variance.subpixmse16x16 = vp8_sub_pixel_mse16x16_mmx; - - cpi->rtcd.variance.mse16x16 = vp8_mse16x16_mmx; - cpi->rtcd.variance.getmbss = vp8_get_mb_ss_mmx; - cpi->rtcd.encodemb.berr = vp8_block_error_mmx; cpi->rtcd.encodemb.mberr = vp8_mbblock_error_mmx; cpi->rtcd.encodemb.mbuverr = vp8_mbuverror_mmx; @@ -126,32 +101,6 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi) { #if HAVE_SSE2 if (flags & HAS_SSE2) { - cpi->rtcd.variance.sad16x16 = vp8_sad16x16_wmt; - cpi->rtcd.variance.sad16x8 = vp8_sad16x8_wmt; - cpi->rtcd.variance.sad8x16 = vp8_sad8x16_wmt; - cpi->rtcd.variance.sad8x8 = vp8_sad8x8_wmt; - cpi->rtcd.variance.sad4x4 = vp8_sad4x4_wmt; - cpi->rtcd.variance.copy32xn = vp8_copy32xn_sse2; - - cpi->rtcd.variance.var4x4 = vp8_variance4x4_wmt; - cpi->rtcd.variance.var8x8 = vp8_variance8x8_wmt; - cpi->rtcd.variance.var8x16 = vp8_variance8x16_wmt; - cpi->rtcd.variance.var16x8 = vp8_variance16x8_wmt; - cpi->rtcd.variance.var16x16 = vp8_variance16x16_wmt; - - cpi->rtcd.variance.subpixvar4x4 = vp8_sub_pixel_variance4x4_wmt; - cpi->rtcd.variance.subpixvar8x8 = vp8_sub_pixel_variance8x8_wmt; - cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_wmt; - cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_wmt; - cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_wmt; - cpi->rtcd.variance.halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_wmt; - cpi->rtcd.variance.halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_wmt; - cpi->rtcd.variance.halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_wmt; - cpi->rtcd.variance.subpixmse16x16 = vp8_sub_pixel_mse16x16_wmt; - - cpi->rtcd.variance.mse16x16 = vp8_mse16x16_wmt; - cpi->rtcd.variance.getmbss = vp8_get_mb_ss_sse2; - cpi->rtcd.encodemb.berr = vp8_block_error_xmm; cpi->rtcd.encodemb.mberr = vp8_mbblock_error_xmm; cpi->rtcd.encodemb.mbuverr = vp8_mbuverror_xmm; @@ -160,54 +109,20 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi) { cpi->rtcd.encodemb.submbuv = vp8_subtract_mbuv_sse2; cpi->rtcd.temporal.apply = vp8_temporal_filter_apply_sse2; -#if CONFIG_INTERNAL_STATS -#if ARCH_X86_64 - cpi->rtcd.variance.ssimpf_8x8 = vp8_ssim_parms_8x8_sse2; - cpi->rtcd.variance.ssimpf_16x16 = vp8_ssim_parms_16x16_sse2; -#endif -#endif } #endif #if HAVE_SSE3 if (flags & HAS_SSE3) { - cpi->rtcd.variance.sad16x16 = vp8_sad16x16_sse3; - cpi->rtcd.variance.sad16x16x3 = vp8_sad16x16x3_sse3; - cpi->rtcd.variance.sad16x8x3 = vp8_sad16x8x3_sse3; - cpi->rtcd.variance.sad8x16x3 = vp8_sad8x16x3_sse3; - cpi->rtcd.variance.sad8x8x3 = vp8_sad8x8x3_sse3; - cpi->rtcd.variance.sad4x4x3 = vp8_sad4x4x3_sse3; cpi->rtcd.search.full_search = vp8_full_search_sadx3; - cpi->rtcd.variance.sad16x16x4d = vp8_sad16x16x4d_sse3; - cpi->rtcd.variance.sad16x8x4d = vp8_sad16x8x4d_sse3; - cpi->rtcd.variance.sad8x16x4d = vp8_sad8x16x4d_sse3; - cpi->rtcd.variance.sad8x8x4d = vp8_sad8x8x4d_sse3; - cpi->rtcd.variance.sad4x4x4d = vp8_sad4x4x4d_sse3; - cpi->rtcd.variance.copy32xn = vp8_copy32xn_sse3; cpi->rtcd.search.diamond_search = vp8_diamond_search_sadx4; cpi->rtcd.search.refining_search = vp8_refining_search_sadx4; } #endif -#if HAVE_SSSE3 - if (flags & HAS_SSSE3) { - cpi->rtcd.variance.sad16x16x3 = vp8_sad16x16x3_ssse3; - cpi->rtcd.variance.sad16x8x3 = vp8_sad16x8x3_ssse3; - - cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_ssse3; - cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_ssse3; - } -#endif - - #if HAVE_SSE4_1 if (flags & HAS_SSE4_1) { - cpi->rtcd.variance.sad16x16x8 = vp8_sad16x16x8_sse4; - cpi->rtcd.variance.sad16x8x8 = vp8_sad16x8x8_sse4; - cpi->rtcd.variance.sad8x16x8 = vp8_sad8x16x8_sse4; - cpi->rtcd.variance.sad8x8x8 = vp8_sad8x8x8_sse4; - cpi->rtcd.variance.sad4x4x8 = vp8_sad4x4x8_sse4; cpi->rtcd.search.full_search = vp8_full_search_sadx8; } #endif diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk index 7058e31..6d2f180 100644 --- a/vp8/vp8cx.mk +++ b/vp8/vp8cx.mk @@ -92,7 +92,6 @@ VP8_CX_SRCS-yes += encoder/mbgraph.h VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/encodemb_x86.h VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/dct_x86.h VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/mcomp_x86.h -VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/variance_x86.h VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/quantize_x86.h VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/temporal_filter_x86.h VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/x86_csystemdependent.c