From 219cdab676fa77532072798fe10627f729f05585 Mon Sep 17 00:00:00 2001 From: Marco Date: Tue, 17 Jan 2017 10:39:26 -0800 Subject: [PATCH] vp9: Add feature to use block source_sad for realtime mode. Only for speed >= 7, and affects skipping of intra modes. Threshold is set low for now, needs to be tuned. Small/no difference in metrics on rtc clips. Change-Id: If9bdbd43f08d1f80407cdd2e9e5e96780dcd2424 --- vp9/encoder/vp9_block.h | 2 ++ vp9/encoder/vp9_encodeframe.c | 8 +++++++- vp9/encoder/vp9_encoder.c | 5 ++++- vp9/encoder/vp9_encoder.h | 2 ++ vp9/encoder/vp9_pickmode.c | 3 ++- vp9/encoder/vp9_ratectrl.c | 8 ++++++-- vp9/encoder/vp9_speed_features.c | 9 +++++++++ 7 files changed, 32 insertions(+), 5 deletions(-) diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index 1ea5fdf..0d5075c 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -151,6 +151,8 @@ struct macroblock { uint8_t sb_is_skin; + uint8_t skip_low_source_sad; + // Used to save the status of whether a block has a low variance in // choose_partitioning. 0 for 64x64, 1~2 for 64x32, 3~4 for 32x64, 5~8 for // 32x32, 9~24 for 16x16. diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 1247c6d..4b6c9b0 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -925,7 +925,12 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile, int variance4x4downsample[16]; int segment_id; int sb_offset = (cm->mi_stride >> 3) * (mi_row >> 3) + (mi_col >> 3); - + if (cpi->sf.use_source_sad && !is_key_frame) { + // The sb_offset2 is to make it consistent with the index in the function + // vp9_avg_source_sad() in vp9_ratectrl.c. + int sb_offset2 = ((cm->mi_cols + 7) >> 3) * (mi_row >> 3) + (mi_col >> 3); + x->skip_low_source_sad = cpi->avg_source_sad_sb[sb_offset2] == 1 ? 1 : 0; + } set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64); segment_id = xd->mi[0]->segment_id; if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) { @@ -3857,6 +3862,7 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td, x->color_sensitivity[0] = 0; x->color_sensitivity[1] = 0; x->sb_is_skin = 0; + x->skip_low_source_sad = 0; if (seg->enabled) { const uint8_t *const map = diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index 934afc1..26326fc 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -462,6 +462,9 @@ static void dealloc_compressor_data(VP9_COMP *cpi) { vpx_free(cpi->copied_frame_cnt); cpi->copied_frame_cnt = NULL; + vpx_free(cpi->avg_source_sad_sb); + cpi->avg_source_sad_sb = NULL; + vp9_cyclic_refresh_free(cpi->cyclic_refresh); cpi->cyclic_refresh = NULL; @@ -3156,7 +3159,7 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size, (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_VBR && cpi->oxcf.mode == REALTIME && cpi->oxcf.speed >= 5) || cpi->sf.partition_search_type == SOURCE_VAR_BASED_PARTITION || - cpi->noise_estimate.enabled)) + cpi->noise_estimate.enabled || cpi->sf.use_source_sad)) cpi->Last_Source = vp9_scale_if_required(cm, cpi->unscaled_last_source, &cpi->scaled_last_source, (cpi->oxcf.pass == 0)); diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h index cb10e50..00552e1 100644 --- a/vp9/encoder/vp9_encoder.h +++ b/vp9/encoder/vp9_encoder.h @@ -642,6 +642,8 @@ typedef struct VP9_COMP { uint8_t *copied_frame_cnt; uint8_t max_copied_frame; + uint8_t *avg_source_sad_sb; + LevelConstraint level_constraint; } VP9_COMP; diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index d5cace0..d146375 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -1953,7 +1953,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, if ((!force_skip_low_temp_var || bsize < BLOCK_32X32) && perform_intra_pred && (best_rdc.rdcost == INT64_MAX || (!x->skip && best_rdc.rdcost > inter_mode_thresh && - bsize <= cpi->sf.max_intra_bsize))) { + bsize <= cpi->sf.max_intra_bsize)) && + !x->skip_low_source_sad) { struct estimate_block_intra_args args = { cpi, x, DC_PRED, 1, 0 }; int i; TX_SIZE best_intra_tx_size = TX_SIZES; diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c index 7834393..96f0af4 100644 --- a/vp9/encoder/vp9_ratectrl.c +++ b/vp9/encoder/vp9_ratectrl.c @@ -2242,6 +2242,7 @@ void vp9_avg_source_sad(VP9_COMP *cpi) { const BLOCK_SIZE bsize = BLOCK_64X64; // Loop over sub-sample of frame, compute average sad over 64x64 blocks. uint64_t avg_sad = 0; + uint64_t tmp_sad = 0; int num_samples = 0; int sb_cols = (cm->mi_cols + MI_BLOCK_SIZE - 1) / MI_BLOCK_SIZE; int sb_rows = (cm->mi_rows + MI_BLOCK_SIZE - 1) / MI_BLOCK_SIZE; @@ -2260,9 +2261,12 @@ void vp9_avg_source_sad(VP9_COMP *cpi) { (sbi_row < sb_rows - 1 && sbi_col < sb_cols - 1) && ((sbi_row % 2 == 0 && sbi_col % 2 == 0) || (sbi_row % 2 != 0 && sbi_col % 2 != 0)))) { + tmp_sad = cpi->fn_ptr[bsize].sdf(src_y, src_ystride, last_src_y, + last_src_ystride); + if (cpi->sf.use_source_sad) + cpi->avg_source_sad_sb[num_samples] = tmp_sad < 5000 ? 1 : 0; + avg_sad += tmp_sad; num_samples++; - avg_sad += cpi->fn_ptr[bsize].sdf(src_y, src_ystride, last_src_y, - last_src_ystride); } src_y += 64; last_src_y += 64; diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index 934897d..9ce756e 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -494,6 +494,15 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, int speed, sf->mv.search_method = NSTEP; sf->mv.fullpel_search_step_param = 6; } + if (!cpi->use_svc && !cpi->resize_pending && !cpi->resize_state && + !cpi->external_resize && cpi->oxcf.resize_mode == RESIZE_NONE) + sf->use_source_sad = 1; + if (sf->use_source_sad) { + if (cpi->avg_source_sad_sb == NULL) { + cpi->avg_source_sad_sb = (uint8_t *)vpx_calloc( + (cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1), sizeof(uint8_t)); + } + } } if (speed >= 8) { -- 2.7.4