From cdc359989a140cb2f25b81d768583e23d1a9ad85 Mon Sep 17 00:00:00 2001 From: paulwilkins Date: Wed, 23 Sep 2015 11:13:52 +0100 Subject: [PATCH] Changes to partition breakout rules. Changes to the breakout behavior for partition selection. The biggest impact is on speed 0 where encode speed in some cases more than doubles with typically less than 1% impact on quality. Speed 0 encode speed impact examples Animation test clip: +128% Park Joy: +59% Old town Cross: + 109% Change-Id: I222720657e56cede1b2a5539096f788ffb2df3a1 --- vp9/encoder/vp9_encodeframe.c | 60 +++++++++++++++++++++++++--------------- vp9/encoder/vp9_speed_features.c | 11 ++++++-- vp9/encoder/vp9_speed_features.h | 1 + 3 files changed, 47 insertions(+), 25 deletions(-) diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 4a4301e..58fbc91 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -2377,11 +2377,20 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, bsize >= BLOCK_8X8; int partition_vert_allowed = !force_horz_split && xss <= yss && bsize >= BLOCK_8X8; - (void) *tp_orig; + + int64_t dist_breakout_thr = cpi->sf.partition_search_breakout_dist_thr; + int rate_breakout_thr = cpi->sf.partition_search_breakout_rate_thr; + + (void)*tp_orig; assert(num_8x8_blocks_wide_lookup[bsize] == num_8x8_blocks_high_lookup[bsize]); + // Adjust dist breakout threshold according to the partition size. + dist_breakout_thr >>= 8 - (b_width_log2_lookup[bsize] + + b_height_log2_lookup[bsize]); + rate_breakout_thr *= num_pels_log2_lookup[bsize]; + vp9_rd_cost_init(&this_rdc); vp9_rd_cost_init(&sum_rdc); vp9_rd_cost_reset(&best_rdc); @@ -2410,9 +2419,13 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, force_vert_split); do_split &= bsize > min_size; } - if (cpi->sf.use_square_partition_only) { - partition_horz_allowed &= force_horz_split; - partition_vert_allowed &= force_vert_split; + + if (cpi->sf.use_square_partition_only && + (bsize > cpi->sf.use_square_only_threshold)) { + if (!vp9_active_h_edge(cpi, mi_row, mi_step) || x->e_mbd.lossless) + partition_horz_allowed &= force_horz_split; + if (!vp9_active_v_edge(cpi, mi_row, mi_step) || x->e_mbd.lossless) + partition_vert_allowed &= force_vert_split; } save_context(x, mi_row, mi_col, a, l, sa, sl, bsize); @@ -2489,27 +2502,17 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, } if (this_rdc.rdcost < best_rdc.rdcost) { - int64_t dist_breakout_thr = cpi->sf.partition_search_breakout_dist_thr; - int rate_breakout_thr = cpi->sf.partition_search_breakout_rate_thr; - best_rdc = this_rdc; if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE; - // Adjust dist breakout threshold according to the partition size. - dist_breakout_thr >>= 8 - (b_width_log2_lookup[bsize] + - b_height_log2_lookup[bsize]); - - rate_breakout_thr *= num_pels_log2_lookup[bsize]; - // If all y, u, v transform blocks in this partition are skippable, and // the dist & rate are within the thresholds, the partition search is // terminated for current branch of the partition search tree. - // The dist & rate thresholds are set to 0 at speed 0 to disable the - // early termination at that speed. - if (!x->e_mbd.lossless && - (ctx->skippable && best_rdc.dist < dist_breakout_thr && - best_rdc.rate < rate_breakout_thr)) { + if (!x->e_mbd.lossless && ctx->skippable && + ((best_rdc.dist < (dist_breakout_thr >> 2)) || + (best_rdc.dist < dist_breakout_thr && + best_rdc.rate < rate_breakout_thr))) { do_split = 0; do_rect = 0; } @@ -2619,11 +2622,21 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, if (sum_rdc.rdcost < best_rdc.rdcost) { best_rdc = sum_rdc; pc_tree->partitioning = PARTITION_SPLIT; + + // Rate and distortion based partition search termination clause. + if (!x->e_mbd.lossless && + ((best_rdc.dist < (dist_breakout_thr >> 2)) || + (best_rdc.dist < dist_breakout_thr && + best_rdc.rate < rate_breakout_thr))) { + do_rect = 0; + } } } else { // skip rectangular partition test when larger block size // gives better rd cost - if (cpi->sf.less_rectangular_check) + if ((cpi->sf.less_rectangular_check) && + ((bsize > cpi->sf.use_square_only_threshold) || + (best_rdc.dist < dist_breakout_thr))) do_rect &= !partition_none_allowed; } restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); @@ -2632,7 +2645,7 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, // PARTITION_HORZ if (partition_horz_allowed && (do_rect || vp9_active_h_edge(cpi, mi_row, mi_step))) { - subsize = get_subsize(bsize, PARTITION_HORZ); + subsize = get_subsize(bsize, PARTITION_HORZ); if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx); if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && @@ -2673,6 +2686,10 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, if (sum_rdc.rdcost < best_rdc.rdcost) { best_rdc = sum_rdc; pc_tree->partitioning = PARTITION_HORZ; + + if ((cpi->sf.less_rectangular_check) && + (bsize > cpi->sf.use_square_only_threshold)) + do_rect = 0; } } restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); @@ -2680,7 +2697,7 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, // PARTITION_VERT if (partition_vert_allowed && (do_rect || vp9_active_v_edge(cpi, mi_col, mi_step))) { - subsize = get_subsize(bsize, PARTITION_VERT); + subsize = get_subsize(bsize, PARTITION_VERT); if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx); @@ -2734,7 +2751,6 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, (void) best_rd; *rd_cost = best_rdc; - if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX && pc_tree->index != 3) { int output_enabled = (bsize == BLOCK_64X64); diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index 06d8745..a539629 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -113,8 +113,14 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm, SPEED_FEATURES *sf, int speed) { const int boosted = frame_is_boosted(cpi); + sf->partition_search_breakout_dist_thr = (1 << 20); + sf->partition_search_breakout_rate_thr = 80; + sf->tx_size_search_breakout = 1; sf->adaptive_rd_thresh = 1; sf->allow_skip_recode = 1; + sf->less_rectangular_check = 1; + sf->use_square_partition_only = !frame_is_boosted(cpi); + sf->use_square_only_threshold = BLOCK_16X16; if (speed >= 1) { if ((cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) || @@ -123,6 +129,7 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm, } else { sf->use_square_partition_only = !frame_is_intra_only(cm); } + sf->use_square_only_threshold = BLOCK_4X4; sf->less_rectangular_check = 1; @@ -139,9 +146,6 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm, sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V; sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; - - sf->tx_size_search_breakout = 1; - sf->partition_search_breakout_rate_thr = 80; } if (speed >= 2) { @@ -471,6 +475,7 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) { sf->partition_search_type = SEARCH_PARTITION; sf->less_rectangular_check = 0; sf->use_square_partition_only = 0; + sf->use_square_only_threshold = BLOCK_SIZES; sf->auto_min_max_partition_size = NOT_IN_USE; sf->rd_auto_partition_min_limit = BLOCK_4X4; sf->default_max_partition_size = BLOCK_64X64; diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h index 95038ce..575e98c 100644 --- a/vp9/encoder/vp9_speed_features.h +++ b/vp9/encoder/vp9_speed_features.h @@ -267,6 +267,7 @@ typedef struct SPEED_FEATURES { // Disable testing non square partitions. (eg 16x32) int use_square_partition_only; + BLOCK_SIZE use_square_only_threshold; // Sets min and max partition sizes for this 64x64 region based on the // same 64x64 in last encoded frame, and the left and above neighbor. -- 2.7.4