From 0971d3204d26abe4d86fc873af44ab2286bc9c62 Mon Sep 17 00:00:00 2001 From: Angie Chiang Date: Mon, 22 Jul 2019 15:16:08 -0700 Subject: [PATCH] Reduce call num of exhaustive search The encoding time difference between non_greedy_mv and baseline is reduced from 51% to 13% However, there is also a performance impact. non_greedy_mv performance: Before this CL lowres 0.395% midres 0.716% hdres 0.533% After this CL lowres 0.242% midres 0.429% hdres 0.305% Change-Id: I047d6509df504b264981c0b903c0cc955f45b273 --- vp9/encoder/vp9_encoder.c | 5 ++--- vp9/encoder/vp9_mcomp.c | 34 +++++++++++++++++++++++++--------- vp9/encoder/vp9_mcomp.h | 5 ++--- vp9/encoder/vp9_rdopt.c | 12 ++++++------ 4 files changed, 35 insertions(+), 21 deletions(-) diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index 08d6d4d..868cd43 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -5937,9 +5937,8 @@ static uint32_t motion_compensated_prediction(VP9_COMP *cpi, ThreadData *td, (void)sadpb; nb_full_mv_num = vp9_prepare_nb_full_mvs(&cpi->tpl_stats[frame_idx], mi_row, mi_col, rf_idx, bsize, nb_full_mvs); - vp9_full_pixel_diamond_new(cpi, x, &best_ref_mv1_full, step_param, lambda, 1, - &cpi->fn_ptr[bsize], nb_full_mvs, nb_full_mv_num, - mv); + vp9_full_pixel_diamond_new(cpi, x, bsize, &best_ref_mv1_full, step_param, + lambda, 1, nb_full_mvs, nb_full_mv_num, mv); #else (void)frame_idx; (void)mi_row; diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index 0e44bb4..b6e3090 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -2437,16 +2437,24 @@ unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x, return best_sad; } +static int get_exhaustive_threshold(int exhaustive_searches_thresh, + BLOCK_SIZE bsize) { + return exhaustive_searches_thresh >> + (8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize])); +} + #if CONFIG_NON_GREEDY_MV // Runs sequence of diamond searches in smaller steps for RD. /* do_refine: If last step (1-away) of n-step search doesn't pick the center point as the best match, we will do a final 1-away diamond refining search */ -int vp9_full_pixel_diamond_new(const VP9_COMP *cpi, MACROBLOCK *x, MV *mvp_full, - int step_param, int lambda, int do_refine, - const vp9_variance_fn_ptr_t *fn_ptr, +int vp9_full_pixel_diamond_new(const VP9_COMP *cpi, MACROBLOCK *x, + BLOCK_SIZE bsize, MV *mvp_full, int step_param, + int lambda, int do_refine, const int_mv *nb_full_mvs, int full_mv_num, MV *best_mv) { + const vp9_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize]; + const SPEED_FEATURES *const sf = &cpi->sf; int n, num00 = 0; int thissme; int bestsme; @@ -2495,9 +2503,16 @@ int vp9_full_pixel_diamond_new(const VP9_COMP *cpi, MACROBLOCK *x, MV *mvp_full, } } - full_pixel_exhaustive_new(cpi, x, best_mv, fn_ptr, best_mv, lambda, - nb_full_mvs, full_mv_num); - bestsme = vp9_get_mvpred_var(x, best_mv, ¢er_mv, fn_ptr, 0); + if (sf->exhaustive_searches_thresh < INT_MAX && + !cpi->rc.is_src_frame_alt_ref) { + const int64_t exhaustive_thr = + get_exhaustive_threshold(sf->exhaustive_searches_thresh, bsize); + if (bestsme > exhaustive_thr) { + full_pixel_exhaustive_new(cpi, x, best_mv, fn_ptr, best_mv, lambda, + nb_full_mvs, full_mv_num); + bestsme = vp9_get_mvpred_var(x, best_mv, ¢er_mv, fn_ptr, 0); + } + } return bestsme; } #endif // CONFIG_NON_GREEDY_MV @@ -2886,9 +2901,10 @@ int vp9_full_pixel_search(const VP9_COMP *const cpi, const MACROBLOCK *const x, if (sf->exhaustive_searches_thresh < INT_MAX && !cpi->rc.is_src_frame_alt_ref) { const int64_t exhaustive_thr = - sf->exhaustive_searches_thresh >> - (8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize])); - if (var > exhaustive_thr) run_exhaustive_search = 1; + get_exhaustive_threshold(sf->exhaustive_searches_thresh, bsize); + if (var > exhaustive_thr) { + run_exhaustive_search = 1; + } } } else if (method == MESH) { run_exhaustive_search = 1; diff --git a/vp9/encoder/vp9_mcomp.h b/vp9/encoder/vp9_mcomp.h index 424ca62..6f46041 100644 --- a/vp9/encoder/vp9_mcomp.h +++ b/vp9/encoder/vp9_mcomp.h @@ -136,9 +136,8 @@ int64_t vp9_refining_search_sad_new(const MACROBLOCK *x, MV *best_full_mv, const int_mv *nb_full_mvs, int full_mv_num); int vp9_full_pixel_diamond_new(const struct VP9_COMP *cpi, MACROBLOCK *x, - MV *mvp_full, int step_param, int lambda, - int do_refine, - const vp9_variance_fn_ptr_t *fn_ptr, + BLOCK_SIZE bsize, MV *mvp_full, int step_param, + int lambda, int do_refine, const int_mv *nb_full_mvs, int full_mv_num, MV *best_mv); diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 65b9435..e243e03 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -2579,9 +2579,9 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, mvp_full.row >>= 3; #if CONFIG_NON_GREEDY_MV - bestsme = vp9_full_pixel_diamond_new(cpi, x, &mvp_full, step_param, lambda, 1, - &cpi->fn_ptr[bsize], nb_full_mvs, - nb_full_mv_num, &tmp_mv->as_mv); + bestsme = vp9_full_pixel_diamond_new(cpi, x, bsize, &mvp_full, step_param, + lambda, 1, nb_full_mvs, nb_full_mv_num, + &tmp_mv->as_mv); #else // CONFIG_NON_GREEDY_MV bestsme = vp9_full_pixel_search( cpi, x, bsize, &mvp_full, step_param, cpi->sf.mv.search_method, sadpb, @@ -2617,9 +2617,9 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, mvp_full.row >>= 3; #if CONFIG_NON_GREEDY_MV this_me = vp9_full_pixel_diamond_new( - cpi, x, &mvp_full, VPXMAX(step_param, MAX_MVSEARCH_STEPS - step), - lambda, 1, &cpi->fn_ptr[bsize], nb_full_mvs, nb_full_mv_num, - &this_mv); + cpi, x, bsize, &mvp_full, + VPXMAX(step_param, MAX_MVSEARCH_STEPS - step), lambda, 1, nb_full_mvs, + nb_full_mv_num, &this_mv); #else // CONFIG_NON_GREEDY_MV this_me = vp9_full_pixel_search( cpi, x, bsize, &mvp_full, -- 2.7.4