From 7da6324cab32d8080f497c912107f94b661a866e Mon Sep 17 00:00:00 2001 From: Alex Converse Date: Wed, 20 Jan 2016 14:28:09 -0800 Subject: [PATCH] Short circuit flat blocks when coding screen content at realtime speed. In inter mode search skip all modes except NEARESTMV and DC_PRED. 10% less encode latency for large frames using the chromium remoting_perftests. +0.313% BDRATE on the screencast set at speed -6. Change-Id: Ib97a39dd8bcdeab545509e0e02d78ce7033f8c63 --- vp9/encoder/vp9_pickmode.c | 20 ++++++++++++++++++++ vp9/encoder/vp9_rd.h | 9 +++++++++ vp9/encoder/vp9_rdopt.h | 9 --------- vp9/encoder/vp9_speed_features.c | 4 ++++ vp9/encoder/vp9_speed_features.h | 4 ++++ 5 files changed, 37 insertions(+), 9 deletions(-) diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index 755323d..8f6f2a3 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -1251,6 +1251,17 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, mi->tx_size = VPXMIN(max_txsize_lookup[bsize], tx_mode_to_biggest_tx_size[cm->tx_mode]); + if (sf->short_circuit_flat_blocks) { +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) + x->source_variance = vp9_high_get_sby_perpixel_variance( + cpi, &x->plane[0].src, bsize, xd->bd); + else +#endif // CONFIG_VP9_HIGHBITDEPTH + x->source_variance = + vp9_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize); + } + #if CONFIG_VP9_TEMPORAL_DENOISING vp9_denoiser_reset_frame_stats(ctx); #endif @@ -1289,6 +1300,11 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (cpi->use_svc) this_mode = ref_mode_set_svc[idx].pred_mode; + if (sf->short_circuit_flat_blocks && x->source_variance == 0 && + this_mode != NEARESTMV) { + continue; + } + if (!(cpi->sf.inter_mode_mask[bsize] & (1 << this_mode))) continue; @@ -1704,6 +1720,10 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, const PREDICTION_MODE this_mode = intra_mode_list[i]; THR_MODES mode_index = mode_idx[INTRA_FRAME][mode_offset(this_mode)]; int mode_rd_thresh = rd_threshes[mode_index]; + if (sf->short_circuit_flat_blocks && x->source_variance == 0 && + this_mode != DC_PRED) { + continue; + } if (!((1 << this_mode) & cpi->sf.intra_y_mode_bsize_mask[bsize])) continue; diff --git a/vp9/encoder/vp9_rd.h b/vp9/encoder/vp9_rd.h index 5e6e773..a92b14e 100644 --- a/vp9/encoder/vp9_rd.h +++ b/vp9/encoder/vp9_rd.h @@ -182,6 +182,15 @@ void vp9_setup_pred_block(const MACROBLOCKD *xd, int vp9_get_intra_cost_penalty(int qindex, int qdelta, vpx_bit_depth_t bit_depth); +unsigned int vp9_get_sby_perpixel_variance(struct VP9_COMP *cpi, + const struct buf_2d *ref, + BLOCK_SIZE bs); +#if CONFIG_VP9_HIGHBITDEPTH +unsigned int vp9_high_get_sby_perpixel_variance(struct VP9_COMP *cpi, + const struct buf_2d *ref, + BLOCK_SIZE bs, int bd); +#endif + #ifdef __cplusplus } // extern "C" #endif diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h index 00ee55c..253e4a0 100644 --- a/vp9/encoder/vp9_rdopt.h +++ b/vp9/encoder/vp9_rdopt.h @@ -29,15 +29,6 @@ void vp9_rd_pick_intra_mode_sb(struct VP9_COMP *cpi, struct macroblock *x, struct RD_COST *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, int64_t best_rd); -unsigned int vp9_get_sby_perpixel_variance(VP9_COMP *cpi, - const struct buf_2d *ref, - BLOCK_SIZE bs); -#if CONFIG_VP9_HIGHBITDEPTH -unsigned int vp9_high_get_sby_perpixel_variance(VP9_COMP *cpi, - const struct buf_2d *ref, - BLOCK_SIZE bs, int bd); -#endif - void vp9_rd_pick_inter_mode_sb(struct VP9_COMP *cpi, struct TileDataEnc *tile_data, struct macroblock *x, diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index c5f0bad..b4f20fc 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -401,6 +401,9 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, sf->intra_y_mode_bsize_mask[i] = INTRA_DC_H_V; } } + if (content == VP9E_CONTENT_SCREEN) { + sf->short_circuit_flat_blocks = 1; + } } if (speed >= 6) { @@ -534,6 +537,7 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) { sf->recode_tolerance = 25; sf->default_interp_filter = SWITCHABLE; sf->simple_model_rd_from_var = 0; + sf->short_circuit_flat_blocks = 0; // Some speed-up features even for best quality as minimal impact on quality. sf->adaptive_rd_thresh = 1; diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h index ad7b64a..fa2f79d 100644 --- a/vp9/encoder/vp9_speed_features.h +++ b/vp9/encoder/vp9_speed_features.h @@ -438,6 +438,10 @@ typedef struct SPEED_FEATURES { // Fast approximation of vp9_model_rd_from_var_lapndz int simple_model_rd_from_var; + + // Skip a number of expensive mode evaluations for blocks with zero source + // variance. + int short_circuit_flat_blocks; } SPEED_FEATURES; struct VP9_COMP; -- 2.7.4