From c5586bfa849475825e248588da2adafeb6be1cb8 Mon Sep 17 00:00:00 2001 From: Yunqing Wang Date: Fri, 5 Oct 2018 10:42:48 -0700 Subject: [PATCH] Changes to facilitate accurate sub-pel motion search This patch included changes to facilitate accurate sub-pel motion search. More patch will follow to turn on accurate sub-pel motion search. Change-Id: I224c28c338353fe5c7609372162f79885c54248f --- vp9/encoder/vp9_encoder.c | 5 +++-- vp9/encoder/vp9_mbgraph.c | 3 ++- vp9/encoder/vp9_mcomp.c | 37 ++++++++++++++++++++++--------------- vp9/encoder/vp9_mcomp.h | 2 +- vp9/encoder/vp9_pickmode.c | 9 ++++++--- vp9/encoder/vp9_rdopt.c | 12 +++++++++--- vp9/encoder/vp9_speed_features.c | 3 +++ vp9/encoder/vp9_speed_features.h | 10 ++++++++++ vp9/encoder/vp9_temporal_filter.c | 5 +++-- 9 files changed, 59 insertions(+), 27 deletions(-) diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index 773ef10..52dd89a 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -5544,12 +5544,13 @@ uint32_t motion_compensated_prediction(VP9_COMP *cpi, ThreadData *td, /* restore UMV window */ x->mv_limits = tmp_mv_limits; + // TODO(yunqing): may use higher tap interp filter than 2 taps. // Ignore mv costing by sending NULL pointer instead of cost array bestsme = cpi->find_fractional_mv_step( x, mv, &best_ref_mv1, cpi->common.allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[bsize], 0, mv_sf->subpel_search_level, - cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0, - 0); + cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0, 0, + USE_2_TAPS); return bestsme; } diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c index 2ec048b..831c79c 100644 --- a/vp9/encoder/vp9_mbgraph.c +++ b/vp9/encoder/vp9_mbgraph.c @@ -57,11 +57,12 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, const MV *ref_mv, { uint32_t distortion; uint32_t sse; + // TODO(yunqing): may use higher tap interp filter than 2 taps if needed. cpi->find_fractional_mv_step( x, dst_mv, ref_mv, cpi->common.allow_high_precision_mv, x->errorperbit, &v_fn_ptr, 0, mv_sf->subpel_search_level, cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0, - 0); + 0, USE_2_TAPS); } xd->mi[0]->mode = NEWMV; diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index 995c54f..ed77d0e 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -367,14 +367,12 @@ static void get_cost_surf_min(int *cost_list, int *ir, int *ic, int bits) { *ir = (int)divide_and_round(x1 * b, y1); } -uint32_t vp9_skip_sub_pixel_tree(const MACROBLOCK *x, MV *bestmv, - const MV *ref_mv, int allow_hp, - int error_per_bit, - const vp9_variance_fn_ptr_t *vfp, - int forced_stop, int iters_per_step, - int *cost_list, int *mvjcost, int *mvcost[2], - uint32_t *distortion, uint32_t *sse1, - const uint8_t *second_pred, int w, int h) { +uint32_t vp9_skip_sub_pixel_tree( + const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, + int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, + int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], + uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, + int h, int use_accurate_subpel_search) { SETUP_SUBPEL_SEARCH; besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z, src_stride, y, y_stride, second_pred, w, h, @@ -397,6 +395,7 @@ uint32_t vp9_skip_sub_pixel_tree(const MACROBLOCK *x, MV *bestmv, (void)sse; (void)thismse; (void)cost_list; + (void)use_accurate_subpel_search; return besterr; } @@ -406,7 +405,7 @@ uint32_t vp9_find_best_sub_pixel_tree_pruned_evenmore( int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, - int h) { + int h, int use_accurate_subpel_search) { SETUP_SUBPEL_SEARCH; besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z, src_stride, y, y_stride, second_pred, w, h, @@ -418,6 +417,7 @@ uint32_t vp9_find_best_sub_pixel_tree_pruned_evenmore( (void)allow_hp; (void)forced_stop; (void)hstep; + (void)use_accurate_subpel_search; if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX && cost_list[2] != INT_MAX && cost_list[3] != INT_MAX && @@ -471,8 +471,10 @@ uint32_t vp9_find_best_sub_pixel_tree_pruned_more( int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, - int h) { + int h, int use_accurate_subpel_search) { SETUP_SUBPEL_SEARCH; + (void)use_accurate_subpel_search; + besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z, src_stride, y, y_stride, second_pred, w, h, offset, mvjcost, mvcost, sse1, distortion); @@ -531,8 +533,10 @@ uint32_t vp9_find_best_sub_pixel_tree_pruned( int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, - int h) { + int h, int use_accurate_subpel_search) { SETUP_SUBPEL_SEARCH; + (void)use_accurate_subpel_search; + besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z, src_stride, y, y_stride, second_pred, w, h, offset, mvjcost, mvcost, sse1, distortion); @@ -622,7 +626,7 @@ uint32_t vp9_find_best_sub_pixel_tree( int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, - int h) { + int h, int use_accurate_subpel_search) { const uint8_t *const z = x->plane[0].src.buf; const uint8_t *const src_address = z; const int src_stride = x->plane[0].src.stride; @@ -650,6 +654,8 @@ uint32_t vp9_find_best_sub_pixel_tree( int kr, kc; MvLimits subpel_mv_limits; + (void)use_accurate_subpel_search; + vp9_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, ref_mv); minc = subpel_mv_limits.col_min; maxc = subpel_mv_limits.col_max; @@ -2587,7 +2593,8 @@ int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, (void)tc; \ (void)sse; \ (void)thismse; \ - (void)cost_list; + (void)cost_list; \ + (void)use_accurate_subpel_search; // Return the maximum MV. uint32_t vp9_return_max_sub_pixel_mv( @@ -2595,7 +2602,7 @@ uint32_t vp9_return_max_sub_pixel_mv( int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, - int h) { + int h, int use_accurate_subpel_search) { COMMON_MV_TEST; (void)minr; @@ -2617,7 +2624,7 @@ uint32_t vp9_return_min_sub_pixel_mv( int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, - int h) { + int h, int use_accurate_subpel_search) { COMMON_MV_TEST; (void)maxr; diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h index adb02bc..6bd85a1 100644 --- a/vp9/encoder/vp9_mcomp.h +++ b/vp9/encoder/vp9_mcomp.h @@ -75,7 +75,7 @@ typedef uint32_t(fractional_mv_step_fp)( int forced_stop, // 0 - full, 1 - qtr only, 2 - half only int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, - int h); + int h, int use_accurate_subpel_search); extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree; extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree_pruned; diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index 416d437..8dce4cf 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -247,7 +247,8 @@ static int combined_motion_search(VP9_COMP *cpi, MACROBLOCK *x, x, &tmp_mv->as_mv, &ref_mv, cpi->common.allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[bsize], subpel_force_stop, cpi->sf.mv.subpel_search_level, cond_cost_list(cpi, cost_list), - x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, 0, 0); + x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, 0, 0, + cpi->sf.use_accurate_subpel_search); *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv, x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); } @@ -1539,7 +1540,8 @@ static int search_new_mv(VP9_COMP *cpi, MACROBLOCK *x, cpi->common.allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop, cpi->sf.mv.subpel_search_level, cond_cost_list(cpi, cost_list), - x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref_frame], NULL, 0, 0); + x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref_frame], NULL, 0, 0, + cpi->sf.use_accurate_subpel_search); } else if (svc->use_base_mv && svc->spatial_layer_id) { if (frame_mv[NEWMV][ref_frame].as_int != INVALID_MV) { const int pre_stride = xd->plane[0].pre[0].stride; @@ -2758,7 +2760,8 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, int mi_row, &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop, cpi->sf.mv.subpel_search_level, cond_cost_list(cpi, cost_list), x->nmvjointcost, x->mvcost, &dummy_dist, - &x->pred_sse[ref_frame], NULL, 0, 0); + &x->pred_sse[ref_frame], NULL, 0, 0, + cpi->sf.use_accurate_subpel_search); xd->mi[0]->bmi[i].as_mv[0].as_mv = tmp_mv; } else { diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 698faa3..9cde479 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -1821,7 +1821,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, x, &tmp_mv, &ref_mv[id].as_mv, cpi->common.allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[bsize], 0, cpi->sf.mv.subpel_search_level, NULL, x->nmvjointcost, x->mvcost, - &dis, &sse, second_pred, pw, ph); + &dis, &sse, second_pred, pw, ph, cpi->sf.use_accurate_subpel_search); } // Restore the pointer to the first (possibly scaled) prediction buffer. @@ -1875,6 +1875,8 @@ static int64_t rd_pick_best_sub8x8_mode( const BLOCK_SIZE bsize = mi->sb_type; const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; + const int pw = num_4x4_blocks_wide << 2; + const int ph = num_4x4_blocks_high << 2; ENTROPY_CONTEXT t_above[2], t_left[2]; int subpelmv = 1, have_ref = 0; SPEED_FEATURES *const sf = &cpi->sf; @@ -2011,7 +2013,8 @@ static int64_t rd_pick_best_sub8x8_mode( x->errorperbit, &cpi->fn_ptr[bsize], sf->mv.subpel_force_stop, sf->mv.subpel_search_level, cond_cost_list(cpi, cost_list), x->nmvjointcost, x->mvcost, &distortion, - &x->pred_sse[mi->ref_frame[0]], NULL, 0, 0); + &x->pred_sse[mi->ref_frame[0]], NULL, pw, ph, + cpi->sf.use_accurate_subpel_search); // save motion search result for use in compound prediction seg_mvs[i][mi->ref_frame[0]].as_mv = *new_mv; @@ -2330,6 +2333,8 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, const int best_predmv_idx = x->mv_best_ref_index[ref]; const YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi, ref); + const int pw = num_4x4_blocks_wide_lookup[bsize] << 2; + const int ph = num_4x4_blocks_high_lookup[bsize] << 2; MV pred_mv[3]; pred_mv[0] = x->mbmi_ext->ref_mvs[ref][0].as_mv; @@ -2452,7 +2457,8 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, x, &tmp_mv->as_mv, &ref_mv, cm->allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop, cpi->sf.mv.subpel_search_level, cond_cost_list(cpi, cost_list), - x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, 0, 0); + x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, pw, ph, + cpi->sf.use_accurate_subpel_search); } *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv, x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index 4490923..49a907d 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -288,6 +288,7 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi, sf->exhaustive_searches_thresh = (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) ? (1 << 23) : INT_MAX; + sf->use_accurate_subpel_search = USE_2_TAPS; } if (speed >= 2) { @@ -450,6 +451,7 @@ static void set_rt_speed_feature_framesize_independent( sf->disable_golden_ref = 0; sf->enable_tpl_model = 0; sf->enhanced_full_pixel_motion_search = 0; + sf->use_accurate_subpel_search = USE_2_TAPS; if (speed >= 1) { sf->allow_txfm_domain_distortion = 1; @@ -942,6 +944,7 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) { sf->ml_prune_rect_partition_threhold[2] = -1; sf->ml_prune_rect_partition_threhold[3] = -1; sf->ml_var_partition_pruning = 0; + sf->use_accurate_subpel_search = USE_8_TAPS; // Some speed-up features even for best quality as minimal impact on quality. sf->adaptive_rd_thresh = 1; diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h index a895ed2..0067bb4 100644 --- a/vp9/encoder/vp9_speed_features.h +++ b/vp9/encoder/vp9_speed_features.h @@ -243,6 +243,12 @@ typedef enum { RE_ENCODE_MAXQ = 2 } OVERSHOOT_DETECTION_CBR_RT; +typedef enum { + USE_2_TAPS = 0, + USE_4_TAPS, + USE_8_TAPS, +} SUBPEL_SEARCH_TYPE; + typedef struct SPEED_FEATURES { MV_SPEED_FEATURES mv; @@ -586,6 +592,10 @@ typedef struct SPEED_FEATURES { // Allow for disabling golden reference. int disable_golden_ref; + + // Allow sub-pixel search to use interpolation filters with different taps in + // order to achieve accurate motion search result. + SUBPEL_SEARCH_TYPE use_accurate_subpel_search; } SPEED_FEATURES; struct VP9_COMP; diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c index 51668d0..7ac70c8 100644 --- a/vp9/encoder/vp9_temporal_filter.c +++ b/vp9/encoder/vp9_temporal_filter.c @@ -421,12 +421,13 @@ static uint32_t temporal_filter_find_matching_mb_c(VP9_COMP *cpi, /* restore UMV window */ x->mv_limits = tmp_mv_limits; + // TODO(yunqing): may use higher tap interp filter than 2 taps if needed. // Ignore mv costing by sending NULL pointer instead of cost array bestsme = cpi->find_fractional_mv_step( x, ref_mv, &best_ref_mv1, cpi->common.allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], 0, mv_sf->subpel_search_level, - cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0, - 0); + cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0, 0, + USE_2_TAPS); // Restore input state x->plane[0].src = src; -- 2.7.4