From e7b58b69fd91a4288453c7c7003e1fc4cc48bb93 Mon Sep 17 00:00:00 2001 From: Neeraj Gadgil Date: Wed, 19 Apr 2023 08:13:26 +0530 Subject: [PATCH] Reduce joint motion search iters based on bsize Joint motion search during compound mode eval is optimized by reducing the number of mv search iterations based on bsize. The sf 'comp_inter_joint_search_thresh' is renamed as 'comp_inter_joint_search_iter_level' and used to add the logic. cpu Testset Instr. Cnt BD Rate loss (%) Red (%) avg. psnr ovr.psnr ssim 0 LOWRES2 5.373 0.0917 0.1088 0.0294 0 MIDRES2 3.395 0.0239 0.0520 0.0783 0 HDRES2 2.291 0.0223 0.0301 0.0053 0 Average 3.686 0.0460 0.0636 0.0377 STATS_CHANGED Change-Id: I7ee8873ebc8af967382324ae8f5c70c26665d5e6 --- vp9/encoder/vp9_rdopt.c | 40 +++++++++++++++++++++++++++++++--------- vp9/encoder/vp9_speed_features.c | 7 ++++--- vp9/encoder/vp9_speed_features.h | 19 ++++++++++++++----- 3 files changed, 49 insertions(+), 17 deletions(-) diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index c68cfef..f051c62 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -1898,11 +1898,22 @@ static INLINE int skip_single_mode_based_on_mode_rate( return 0; } -#define NUM_ITERS 4 +#define MAX_JOINT_MV_SEARCH_ITERS 4 +static INLINE int get_joint_search_iters(int sf_level, BLOCK_SIZE bsize) { + int num_iters = MAX_JOINT_MV_SEARCH_ITERS; // sf_level = 0 + if (sf_level >= 2) + num_iters = 0; + else if (sf_level >= 1) + num_iters = bsize < BLOCK_8X8 + ? 0 + : (bsize <= BLOCK_16X16 ? 2 : MAX_JOINT_MV_SEARCH_ITERS); + return num_iters; +} + static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int_mv *frame_mv, int mi_row, int mi_col, int_mv single_newmv[MAX_REF_FRAMES], - int *rate_mv) { + int *rate_mv, int num_iters) { const VP9_COMMON *const cm = &cpi->common; const int pw = 4 * num_4x4_blocks_wide_lookup[bsize]; const int ph = 4 * num_4x4_blocks_high_lookup[bsize]; @@ -1911,7 +1922,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, const int refs[2] = { mi->ref_frame[0], mi->ref_frame[1] < 0 ? 0 : mi->ref_frame[1] }; int_mv ref_mv[2]; - int_mv iter_mvs[NUM_ITERS][2]; + int_mv iter_mvs[MAX_JOINT_MV_SEARCH_ITERS][2]; int ite, ref; const InterpKernel *kernel = vp9_filter_kernels[mi->interp_filter]; struct scale_factors sf; @@ -1932,6 +1943,9 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, DECLARE_ALIGNED(16, uint8_t, second_pred[64 * 64]); #endif // CONFIG_VP9_HIGHBITDEPTH + // Check number of iterations do not exceed the max + assert(num_iters <= MAX_JOINT_MV_SEARCH_ITERS); + for (ref = 0; ref < 2; ++ref) { ref_mv[ref] = x->mbmi_ext->ref_mvs[refs[ref]][0]; @@ -1962,7 +1976,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, // Allow joint search multiple times iteratively for each reference frame // and break out of the search loop if it couldn't find a better mv. - for (ite = 0; ite < NUM_ITERS; ite++) { + for (ite = 0; ite < num_iters; ite++) { struct buf_2d ref_yv12[2]; uint32_t bestsme = UINT_MAX; int sadpb = x->sadperbit16; @@ -2044,7 +2058,7 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, } else { break; } - if (ite < NUM_ITERS - 1) { + if (ite < num_iters - 1) { iter_mvs[ite + 1][0].as_int = frame_mv[refs[0]].as_int; iter_mvs[ite + 1][1].as_int = frame_mv[refs[1]].as_int; } @@ -2250,12 +2264,16 @@ static int64_t rd_pick_best_sub8x8_mode( if (has_second_rf && this_mode == NEWMV && mi->interp_filter == EIGHTTAP) { + // Decide number of joint motion search iterations + const int num_joint_search_iters = get_joint_search_iters( + cpi->sf.comp_inter_joint_search_iter_level, bsize); // adjust src pointers mi_buf_shift(x, block); - if (sf->comp_inter_joint_search_thresh <= bsize) { + if (num_joint_search_iters) { int rate_mv; joint_motion_search(cpi, x, bsize, frame_mv[this_mode], mi_row, - mi_col, seg_mvs[block], &rate_mv); + mi_col, seg_mvs[block], &rate_mv, + num_joint_search_iters); seg_mvs[block][mi->ref_frame[0]].as_int = frame_mv[this_mode][mi->ref_frame[0]].as_int; seg_mvs[block][mi->ref_frame[1]].as_int = @@ -2878,16 +2896,20 @@ static int64_t handle_inter_mode( if (this_mode == NEWMV) { int rate_mv; if (is_comp_pred) { + // Decide number of joint motion search iterations + const int num_joint_search_iters = get_joint_search_iters( + cpi->sf.comp_inter_joint_search_iter_level, bsize); + // Initialize mv using single prediction mode result. frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int; frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int; - if (cpi->sf.comp_inter_joint_search_thresh <= bsize) { + if (num_joint_search_iters) { #if CONFIG_COLLECT_COMPONENT_TIMING start_timing(cpi, joint_motion_search_time); #endif joint_motion_search(cpi, x, bsize, frame_mv, mi_row, mi_col, - single_newmv, &rate_mv); + single_newmv, &rate_mv, num_joint_search_iters); #if CONFIG_COLLECT_COMPONENT_TIMING end_timing(cpi, joint_motion_search_time); #endif diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index 04804da..60720e3 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -244,6 +244,7 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi, sf->trellis_opt_tx_rd.thresh = boosted ? 4.0 : 3.0; sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; + sf->comp_inter_joint_search_iter_level = 1; // Reference masking is not supported in dynamic scaling mode. sf->reference_masking = oxcf->resize_mode != RESIZE_DYNAMIC; @@ -331,7 +332,7 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi, : FLAG_SKIP_INTRA_DIRMISMATCH | FLAG_SKIP_INTRA_BESTINTER | FLAG_SKIP_COMP_BESTINTRA | FLAG_SKIP_INTRA_LOWVAR; sf->disable_filter_search_var_thresh = 100; - sf->comp_inter_joint_search_thresh = BLOCK_SIZES; + sf->comp_inter_joint_search_iter_level = 2; sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX; sf->recode_tolerance_high = 45; sf->enhanced_full_pixel_motion_search = 0; @@ -530,7 +531,7 @@ static void set_rt_speed_feature_framesize_independent( } sf->disable_filter_search_var_thresh = 50; - sf->comp_inter_joint_search_thresh = BLOCK_SIZES; + sf->comp_inter_joint_search_iter_level = 2; sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX; sf->lf_motion_threshold = LOW_MOTION_THRESHOLD; sf->adjust_partitioning_from_last_frame = 1; @@ -928,7 +929,7 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi, int speed) { sf->mv.auto_mv_step_size = 0; sf->mv.fullpel_search_step_param = 6; sf->mv.use_downsampled_sad = 0; - sf->comp_inter_joint_search_thresh = BLOCK_4X4; + sf->comp_inter_joint_search_iter_level = 0; sf->tx_size_search_method = USE_FULL_RD; sf->use_lp32x32fdct = 0; sf->adaptive_motion_search = 0; diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h index 7cb3f35..70c61fe 100644 --- a/vp9/encoder/vp9_speed_features.h +++ b/vp9/encoder/vp9_speed_features.h @@ -286,11 +286,20 @@ typedef struct SPEED_FEATURES { // adds overhead. int static_segmentation; - // If 1 we iterate finding a best reference for 2 ref frames together - via - // a log search that iterates 4 times (check around mv for last for best - // error of combined predictor then check around mv for alt). If 0 we - // we just use the best motion vector found for each frame by itself. - BLOCK_SIZE comp_inter_joint_search_thresh; + // The best compound predictor is found using an iterative log search process + // that searches for best ref0 mv using error of combined predictor and then + // searches for best ref1 mv. This sf determines the number of iterations of + // this process based on block size. The sf becomes more aggressive from level + // 0 to 2. The following table indicates the number of iterations w.r.t bsize: + // ----------------------------------------------- + // |sf (level)|bsize < 8X8| [8X8, 16X16] | > 16X16 | + // | 0 | 4 | 4 | 4 | + // | 1 | 0 | 2 | 4 | + // | 2 | 0 | 0 | 0 | + // ----------------------------------------------- + // Here, 0 iterations indicate using the best single motion vector selected + // for each ref frame without any iterative refinement. + int comp_inter_joint_search_iter_level; // This variable is used to cap the maximum number of times we skip testing a // mode to be evaluated. A high value means we will be faster. -- 2.7.4