From 9349a28e80b568495d56d6364d3c7ed1abb51188 Mon Sep 17 00:00:00 2001 From: Jingning Han Date: Tue, 28 Oct 2014 14:50:10 -0700 Subject: [PATCH] Enable mode search threshold update in non-RD coding mode Adaptively adjust the mode thresholds after each mode search round to skip checking less likely selected modes. Local tests indicate 5% - 10% speed-up in speed -5 and -6. Average coding performance loss is -1.055%. speed -5 vidyo1 720p 1000 kbps 16533 b/f, 40.851 dB, 12607 ms -> 16556 b/f, 40.796 dB, 11831 ms nik 720p 1000 kbps 33229 b/f, 39.127 dB, 11468 ms -> 33235 b/f, 39.131 dB, 10919 ms speed -6 vidyo1 720p 1000 kbps 16549 b/f, 40.268 dB, 10138 ms -> 16538 b/f, 40.212 dB, 8456 ms nik 720p 1000 kbps 33271 b/f, 38.433 dB, 7886 ms -> 33279 b/f, 38.416 dB, 7843 ms Change-Id: I2c2963f1ce4ed9c1cf233b5b2c880b682e1c1e8b --- vp9/encoder/vp9_pickmode.c | 22 ++++++++++++++-------- vp9/encoder/vp9_rd.c | 23 +++++++++++++++++++++++ vp9/encoder/vp9_rd.h | 8 ++++++++ vp9/encoder/vp9_rdopt.c | 32 +++----------------------------- vp9/encoder/vp9_speed_features.c | 7 +------ 5 files changed, 49 insertions(+), 43 deletions(-) diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index 1e4c9b1..6928338 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -450,7 +450,8 @@ static void estimate_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, args->dist += dist; } -static const THR_MODES mode_idx[MAX_REF_FRAMES - 1][INTER_MODES] = { +static const THR_MODES mode_idx[MAX_REF_FRAMES][4] = { + {THR_DC, THR_H_PRED, THR_V_PRED}, {THR_NEARESTMV, THR_NEARMV, THR_ZEROMV, THR_NEWMV}, {THR_NEARESTG, THR_NEARG, THR_ZEROG, THR_NEWG}, {THR_NEARESTA, THR_NEARA, THR_ZEROA, THR_NEWA}, @@ -558,7 +559,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, EIGHTTAP : cm->interp_filter; mbmi->segment_id = segment_id; - for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { + for (ref_frame = LAST_FRAME; ref_frame <= GOLDEN_FRAME; ++ref_frame) { PREDICTION_MODE this_mode; x->pred_mv_sad[ref_frame] = INT_MAX; frame_mv[NEWMV][ref_frame].as_int = INVALID_MV; @@ -610,8 +611,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, continue; mode_rd_thresh = - rd_threshes[mode_idx[ref_frame - - LAST_FRAME][INTER_OFFSET(this_mode)]]; + rd_threshes[mode_idx[ref_frame][INTER_OFFSET(this_mode)]]; if (rd_less_than_thresh(best_rdc.rdcost, mode_rd_thresh, rd_thresh_freq_fact[this_mode])) continue; @@ -757,10 +757,9 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (x->skip) break; } - // If the current reference frame is valid and we found a usable mode, - // we are done. - if (best_rdc.rdcost < INT64_MAX && ref_frame == GOLDEN_FRAME) - break; + + // Check that a prediction mode has been selected. + assert(best_rdc.rdcost < INT64_MAX); } // If best prediction is not in dst buf, then copy the prediction block from @@ -836,5 +835,12 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, pd->dst = orig_dst; } + if (is_inter_block(mbmi)) + vp9_update_rd_thresh_fact(cpi, tile_data, bsize, + mode_idx[ref_frame][INTER_OFFSET(mbmi->mode)]); + else + vp9_update_rd_thresh_fact(cpi, tile_data, bsize, + mode_idx[ref_frame][mbmi->mode]); + *rd_cost = best_rdc; } diff --git a/vp9/encoder/vp9_rd.c b/vp9/encoder/vp9_rd.c index d758430..587ffad 100644 --- a/vp9/encoder/vp9_rd.c +++ b/vp9/encoder/vp9_rd.c @@ -611,6 +611,29 @@ void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) { rd->thresh_mult_sub8x8[i] = INT_MAX; } +// TODO(jingning) Refactor this function. Use targeted smaller struct as inputs. +void vp9_update_rd_thresh_fact(VP9_COMP *cpi, TileDataEnc *tile_data, + int bsize, int best_mode_index) { + if (cpi->sf.adaptive_rd_thresh > 0) { + const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES; + int mode; + for (mode = 0; mode < top_mode; ++mode) { + const BLOCK_SIZE min_size = MAX(bsize - 1, BLOCK_4X4); + const BLOCK_SIZE max_size = MIN(bsize + 2, BLOCK_64X64); + BLOCK_SIZE bs; + for (bs = min_size; bs <= max_size; ++bs) { + int *const fact = &tile_data->thresh_freq_fact[bs][mode]; + if (mode == best_mode_index) { + *fact -= (*fact >> 4); + } else { + *fact = MIN(*fact + RD_THRESH_INC, + cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT); + } + } + } + } +} + int vp9_get_intra_cost_penalty(int qindex, int qdelta, vpx_bit_depth_t bit_depth) { const int q = vp9_dc_quant(qindex, qdelta, bit_depth); diff --git a/vp9/encoder/vp9_rd.h b/vp9/encoder/vp9_rd.h index 1aa5266..aecca0b 100644 --- a/vp9/encoder/vp9_rd.h +++ b/vp9/encoder/vp9_rd.h @@ -36,6 +36,9 @@ extern "C" { #define MAX_MODES 30 #define MAX_REFS 6 +#define RD_THRESH_MAX_FACT 64 +#define RD_THRESH_INC 1 + // This enumerator type needs to be kept aligned with the mode order in // const MODE_DEFINITION vp9_mode_order[MAX_MODES] used in the rd code. typedef enum { @@ -129,6 +132,7 @@ void vp9_rd_cost_reset(RD_COST *rd_cost); void vp9_rd_cost_init(RD_COST *rd_cost); struct TileInfo; +struct TileDataEnc; struct VP9_COMP; struct macroblock; @@ -158,6 +162,10 @@ void vp9_set_rd_speed_thresholds(struct VP9_COMP *cpi); void vp9_set_rd_speed_thresholds_sub8x8(struct VP9_COMP *cpi); +void vp9_update_rd_thresh_fact(struct VP9_COMP *cpi, + struct TileDataEnc *tile_data, + int bsize, int best_mode_index); + static INLINE int rd_less_than_thresh(int64_t best_rd, int thresh, int thresh_fact) { return best_rd < ((int64_t)thresh * thresh_fact >> 5) || thresh == INT_MAX; diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 646cc89..795820b 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -38,9 +38,6 @@ #include "vp9/encoder/vp9_rdopt.h" #include "vp9/encoder/vp9_variance.h" -#define RD_THRESH_MAX_FACT 64 -#define RD_THRESH_INC 1 - #define LAST_FRAME_MODE_MASK ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | \ (1 << INTRA_FRAME)) #define GOLDEN_FRAME_MODE_MASK ((1 << LAST_FRAME) | (1 << ALTREF_FRAME) | \ @@ -2765,29 +2762,6 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist); } -// TODO(jingning) Refactor this function. Use targeted smaller struct as inputs. -static void update_rd_thresh_fact(VP9_COMP *cpi, TileDataEnc *tile_data, - int bsize, int best_mode_index) { - if (cpi->sf.adaptive_rd_thresh > 0) { - const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES; - int mode; - for (mode = 0; mode < top_mode; ++mode) { - const BLOCK_SIZE min_size = MAX(bsize - 1, BLOCK_4X4); - const BLOCK_SIZE max_size = MIN(bsize + 2, BLOCK_64X64); - BLOCK_SIZE bs; - for (bs = min_size; bs <= max_size; ++bs) { - int *const fact = &tile_data->thresh_freq_fact[bs][mode]; - if (mode == best_mode_index) { - *fact -= (*fact >> 4); - } else { - *fact = MIN(*fact + RD_THRESH_INC, - cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT); - } - } - } - } -} - void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data, MACROBLOCK *x, @@ -3423,7 +3397,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, !is_inter_block(&best_mbmode)); if (!cpi->rc.is_src_frame_alt_ref) - update_rd_thresh_fact(cpi, tile_data, bsize, best_mode_index); + vp9_update_rd_thresh_fact(cpi, tile_data, bsize, best_mode_index); // macroblock modes *mbmi = best_mbmode; @@ -3578,7 +3552,7 @@ void vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi, assert((cm->interp_filter == SWITCHABLE) || (cm->interp_filter == mbmi->interp_filter)); - update_rd_thresh_fact(cpi, tile_data, bsize, THR_ZEROMV); + vp9_update_rd_thresh_fact(cpi, tile_data, bsize, THR_ZEROMV); vp9_zero(best_pred_diff); vp9_zero(best_filter_diff); @@ -4153,7 +4127,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, (cm->interp_filter == best_mbmode.interp_filter) || !is_inter_block(&best_mbmode)); - update_rd_thresh_fact(cpi, tile_data, bsize, best_ref_index); + vp9_update_rd_thresh_fact(cpi, tile_data, bsize, best_ref_index); // macroblock modes *mbmi = best_mbmode; diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index e148bf9..294d5cc 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -269,7 +269,7 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST_NEW_ZERO; sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST_NEW_ZERO; sf->inter_mode_mask[BLOCK_64X64] = INTER_NEAREST_NEW_ZERO; - + sf->adaptive_rd_thresh = 2; // This feature is only enabled when partition search is disabled. sf->reuse_inter_pred_sby = 1; @@ -292,12 +292,7 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, sf->partition_search_type = VAR_BASED_PARTITION; sf->search_type_check_frequency = 50; sf->mv.search_method = NSTEP; - sf->tx_size_search_method = is_keyframe ? USE_LARGESTALL : USE_TX_8X8; - - // Increase mode checking threshold for NEWMV. - sf->elevate_newmv_thresh = 1000; - sf->mv.reduce_first_step_size = 1; } -- 2.7.4