From 7f2daa74a05beee77003ef4288eafb3e3db9a531 Mon Sep 17 00:00:00 2001 From: Marco Date: Mon, 13 Feb 2017 10:16:42 -0800 Subject: [PATCH] vp9: Incorporate source sum_diff into non-rd partition thresholds. Increase the variance partition thresholds for superblocks that have low sum-diff (from source analysis prior to encoding frame). Use it for now only for speed >= 7 or for denoising on. Small change on metrics for rtc set: less than ~0.1 avgPNSR decrease on RTC set, for both speed 7 and 8. Change-Id: I38325046ebd5f371f51d6e91233d68ff73561af1 --- vp9/encoder/vp9_denoiser.c | 11 ++++--- vp9/encoder/vp9_denoiser.h | 4 +-- vp9/encoder/vp9_encodeframe.c | 69 +++++++++++++++++++++++++++++----------- vp9/encoder/vp9_encodeframe.h | 3 +- vp9/encoder/vp9_encoder.c | 8 ++--- vp9/encoder/vp9_encoder.h | 10 +++++- vp9/encoder/vp9_ratectrl.c | 18 +++++++++-- vp9/encoder/vp9_speed_features.c | 4 +-- 8 files changed, 90 insertions(+), 37 deletions(-) diff --git a/vp9/encoder/vp9_denoiser.c b/vp9/encoder/vp9_denoiser.c index afc66d0..c16429c 100644 --- a/vp9/encoder/vp9_denoiser.c +++ b/vp9/encoder/vp9_denoiser.c @@ -565,12 +565,13 @@ void vp9_denoiser_set_noise_level(VP9_DENOISER *denoiser, int noise_level) { } // Scale/increase the partition threshold for denoiser speed-up. -int64_t vp9_scale_part_thresh(int64_t threshold, - VP9_DENOISER_LEVEL noise_level) { - if (noise_level >= kDenLow) - return ((5 * threshold) >> 2); +int64_t vp9_scale_part_thresh(int64_t threshold, VP9_DENOISER_LEVEL noise_level, + int content_state) { + if ((content_state == kLowSadLowSumdiff) || + (content_state == kHighSadLowSumdiff) || noise_level == kDenHigh) + return (3 * threshold) >> 1; else - return threshold; + return (5 * threshold) >> 2; } // Scale/increase the ac skip threshold for denoiser speed-up. diff --git a/vp9/encoder/vp9_denoiser.h b/vp9/encoder/vp9_denoiser.h index a029339..0ec8622 100644 --- a/vp9/encoder/vp9_denoiser.h +++ b/vp9/encoder/vp9_denoiser.h @@ -97,8 +97,8 @@ void vp9_denoiser_free(VP9_DENOISER *denoiser); void vp9_denoiser_set_noise_level(VP9_DENOISER *denoiser, int noise_level); -int64_t vp9_scale_part_thresh(int64_t threshold, - VP9_DENOISER_LEVEL noise_level); +int64_t vp9_scale_part_thresh(int64_t threshold, VP9_DENOISER_LEVEL noise_level, + int content_state); int64_t vp9_scale_acskip_thresh(int64_t threshold, VP9_DENOISER_LEVEL noise_level, diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 215f8b8..23b164f 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -461,16 +461,35 @@ static int set_vt_partitioning(VP9_COMP *cpi, MACROBLOCK *const x, return 0; } +int64_t scale_part_thresh_sumdiff(int64_t threshold_base, int speed, int width, + int height, int content_state) { + if (speed >= 8) { + if (width <= 640 && height <= 480) + return (5 * threshold_base) >> 2; + else if ((content_state == kLowSadLowSumdiff) || + (content_state == kHighSadLowSumdiff)) + return (5 * threshold_base) >> 2; + } else if (speed == 7) { + if ((content_state == kLowSadLowSumdiff) || + (content_state == kHighSadLowSumdiff)) { + return (5 * threshold_base) >> 2; + } + } + return threshold_base; +} + // Set the variance split thresholds for following the block sizes: // 0 - threshold_64x64, 1 - threshold_32x32, 2 - threshold_16x16, // 3 - vbp_threshold_8x8. vbp_threshold_8x8 (to split to 4x4 partition) is // currently only used on key frame. -static void set_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q) { +static void set_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q, + int content_state) { VP9_COMMON *const cm = &cpi->common; const int is_key_frame = (cm->frame_type == KEY_FRAME); const int threshold_multiplier = is_key_frame ? 20 : 1; int64_t threshold_base = (int64_t)(threshold_multiplier * cpi->y_dequant[q][1]); + if (is_key_frame) { thresholds[0] = threshold_base; thresholds[1] = threshold_base >> 2; @@ -489,14 +508,18 @@ static void set_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q) { threshold_base = (7 * threshold_base) >> 3; } #if CONFIG_VP9_TEMPORAL_DENOISING - if (cpi->oxcf.noise_sensitivity > 0) + if (cpi->oxcf.noise_sensitivity > 0 && + cpi->denoiser.denoising_level >= kDenLow) + threshold_base = vp9_scale_part_thresh( + threshold_base, cpi->denoiser.denoising_level, content_state); + else threshold_base = - vp9_scale_part_thresh(threshold_base, cpi->denoiser.denoising_level); - else if (cpi->oxcf.speed >= 8 && cm->width <= 640 && cm->height <= 480) - threshold_base = (5 * threshold_base) >> 2; + scale_part_thresh_sumdiff(threshold_base, cpi->oxcf.speed, cm->width, + cm->height, content_state); #else - if (cpi->oxcf.speed >= 8 && cm->width <= 640 && cm->height <= 480) - threshold_base = (5 * threshold_base) >> 2; + // Increase base variance threshold based on content_state/sum_diff level. + threshold_base = scale_part_thresh_sumdiff( + threshold_base, cpi->oxcf.speed, cm->width, cm->height, content_state); #endif thresholds[0] = threshold_base; thresholds[2] = threshold_base << cpi->oxcf.speed; @@ -514,7 +537,8 @@ static void set_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q) { } } -void vp9_set_variance_partition_thresholds(VP9_COMP *cpi, int q) { +void vp9_set_variance_partition_thresholds(VP9_COMP *cpi, int q, + int content_state) { VP9_COMMON *const cm = &cpi->common; SPEED_FEATURES *const sf = &cpi->sf; const int is_key_frame = (cm->frame_type == KEY_FRAME); @@ -522,7 +546,7 @@ void vp9_set_variance_partition_thresholds(VP9_COMP *cpi, int q) { sf->partition_search_type != REFERENCE_PARTITION) { return; } else { - set_vbp_thresholds(cpi, cpi->vbp_thresholds, q); + set_vbp_thresholds(cpi, cpi->vbp_thresholds, q, content_state); // The thresholds below are not changed locally. if (is_key_frame) { cpi->vbp_threshold_sad = 0; @@ -929,6 +953,7 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile, int avg_16x16[4]; int64_t threshold_4x4avg; NOISE_LEVEL noise_level = kLow; + int content_state = 0; uint8_t *s; const uint8_t *d; int sp; @@ -956,27 +981,33 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile, set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64); segment_id = xd->mi[0]->segment_id; - if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) { - if (cyclic_refresh_segment_id_boosted(segment_id)) { - int q = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex); - set_vbp_thresholds(cpi, thresholds, q); - } - } if (cpi->sf.use_source_sad && !is_key_frame) { // The sb_offset2 is to make it consistent with the index in the function // vp9_avg_source_sad() in vp9_ratectrl.c. int sb_offset2 = ((cm->mi_cols + 7) >> 3) * (mi_row >> 3) + (mi_col >> 3); - x->skip_low_source_sad = cpi->avg_source_sad_sb[sb_offset2] == 1 ? 1 : 0; - // If avg_source_sad is lower than the threshold, copy the partition without - // computing the y_sad. - if (cpi->avg_source_sad_sb[sb_offset2] && cpi->sf.copy_partition_flag && + content_state = cpi->content_state_sb[sb_offset2]; + x->skip_low_source_sad = (content_state == kLowSadLowSumdiff || + content_state == kLowSadHighSumdiff) + ? 1 + : 0; + // If source_sad is low copy the partition without computing the y_sad. + if (x->skip_low_source_sad && cpi->sf.copy_partition_flag && copy_partitioning(cpi, x, mi_row, mi_col, segment_id, sb_offset)) { chroma_check(cpi, x, bsize, y_sad, is_key_frame); return 0; } } + if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) { + if (cyclic_refresh_segment_id_boosted(segment_id)) { + int q = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex); + set_vbp_thresholds(cpi, thresholds, q, content_state); + } + } else { + set_vbp_thresholds(cpi, thresholds, cm->base_qindex, content_state); + } + // For non keyframes, disable 4x4 average for low resolution when speed = 8 threshold_4x4avg = (cpi->oxcf.speed < 8) ? thresholds[1] << 1 : INT64_MAX; diff --git a/vp9/encoder/vp9_encodeframe.h b/vp9/encoder/vp9_encodeframe.h index 2b9b65d..cf5ae3d 100644 --- a/vp9/encoder/vp9_encodeframe.h +++ b/vp9/encoder/vp9_encodeframe.h @@ -42,7 +42,8 @@ void vp9_encode_tile(struct VP9_COMP *cpi, struct ThreadData *td, int tile_row, void vp9_encode_sb_row(struct VP9_COMP *cpi, struct ThreadData *td, int tile_row, int tile_col, int mi_row); -void vp9_set_variance_partition_thresholds(struct VP9_COMP *cpi, int q); +void vp9_set_variance_partition_thresholds(struct VP9_COMP *cpi, int q, + int content_state); #ifdef __cplusplus } // extern "C" diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index c33da10..e07d2af 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -463,8 +463,8 @@ static void dealloc_compressor_data(VP9_COMP *cpi) { vpx_free(cpi->copied_frame_cnt); cpi->copied_frame_cnt = NULL; - vpx_free(cpi->avg_source_sad_sb); - cpi->avg_source_sad_sb = NULL; + vpx_free(cpi->content_state_sb); + cpi->content_state_sb = NULL; vp9_cyclic_refresh_free(cpi->cyclic_refresh); cpi->cyclic_refresh = NULL; @@ -3221,7 +3221,7 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size, } vp9_set_quantizer(cm, q); - vp9_set_variance_partition_thresholds(cpi, q); + vp9_set_variance_partition_thresholds(cpi, q, 0); setup_frame(cpi); @@ -3264,7 +3264,7 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size, if (vp9_encodedframe_overshoot(cpi, frame_size, &q)) { vpx_clear_system_state(); vp9_set_quantizer(cm, q); - vp9_set_variance_partition_thresholds(cpi, q); + vp9_set_variance_partition_thresholds(cpi, q, 0); suppress_active_map(cpi); // Turn-off cyclic refresh for re-encoded frame. if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h index 6755126..2797ff7 100644 --- a/vp9/encoder/vp9_encoder.h +++ b/vp9/encoder/vp9_encoder.h @@ -131,6 +131,14 @@ typedef enum { RESIZE_DYNAMIC = 2 // Coded size of each frame is determined by the codec. } RESIZE_TYPE; +typedef enum { + kInvalid = 0, + kLowSadLowSumdiff = 1, + kLowSadHighSumdiff = 2, + kHighSadLowSumdiff = 3, + kHighSadHighSumdiff = 4, +} CONTENT_STATE_SB; + typedef struct VP9EncoderConfig { BITSTREAM_PROFILE profile; vpx_bit_depth_t bit_depth; // Codec bit-depth. @@ -697,7 +705,7 @@ typedef struct VP9_COMP { uint8_t *copied_frame_cnt; uint8_t max_copied_frame; - uint8_t *avg_source_sad_sb; + uint8_t *content_state_sb; LevelConstraint level_constraint; } VP9_COMP; diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c index 906a3c4..6932c0c 100644 --- a/vp9/encoder/vp9_ratectrl.c +++ b/vp9/encoder/vp9_ratectrl.c @@ -15,6 +15,7 @@ #include #include +#include "./vpx_dsp_rtcd.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem.h" @@ -2290,9 +2291,20 @@ void vp9_avg_source_sad(VP9_COMP *cpi) { (sbi_row % 2 != 0 && sbi_col % 2 != 0)))) { tmp_sad = cpi->fn_ptr[bsize].sdf(src_y, src_ystride, last_src_y, last_src_ystride); - if (cpi->sf.use_source_sad) - cpi->avg_source_sad_sb[num_samples] = - tmp_sad < avg_source_sad_threshold ? 1 : 0; + if (cpi->sf.use_source_sad) { + unsigned int tmp_sse; + unsigned int tmp_variance = vpx_variance64x64( + src_y, src_ystride, last_src_y, last_src_ystride, &tmp_sse); + // Note: tmp_sse - tmp_variance = ((sum * sum) >> 12) + if (tmp_sad < avg_source_sad_threshold) + cpi->content_state_sb[num_samples] = + ((tmp_sse - tmp_variance) < 25) ? kLowSadLowSumdiff + : kLowSadHighSumdiff; + else + cpi->content_state_sb[num_samples] = + ((tmp_sse - tmp_variance) < 25) ? kHighSadLowSumdiff + : kHighSadHighSumdiff; + } avg_sad += tmp_sad; num_samples++; } diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index d63f8db..f081381 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -498,8 +498,8 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, int speed, !cpi->external_resize && cpi->oxcf.resize_mode == RESIZE_NONE) sf->use_source_sad = 1; if (sf->use_source_sad) { - if (cpi->avg_source_sad_sb == NULL) { - cpi->avg_source_sad_sb = (uint8_t *)vpx_calloc( + if (cpi->content_state_sb == NULL) { + cpi->content_state_sb = (uint8_t *)vpx_calloc( (cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1), sizeof(uint8_t)); } } -- 2.7.4