From eee904c9b91b62510169c52d5fc05580fd1f18c0 Mon Sep 17 00:00:00 2001 From: Jingning Han Date: Thu, 18 Sep 2014 13:37:20 -0700 Subject: [PATCH] Adaptive mode search scheduling This commit enables an adaptive mode search order scheduling scheme in the rate-distortion optimization. It changes the compression performance by -0.433% and -0.420% for derf and stdhd respectively. It provides speed improvement for speed 3: bus CIF 1000 kbps 24590 b/f, 35.513 dB, 7864 ms -> 24696 b/f, 35.491 dB, 7408 ms (6% speed-up) stockholm 720p 1000 kbps 8983 b/f, 35.078 dB, 65698 ms -> 8962 b/f, 35.054 dB, 60298 ms (8%) old_town_cross 720p 1000 kbps 11804 b/f, 35.666 dB, 62492 ms -> 11778 b/f, 35.609 dB, 56040 ms (10%) blue_sky 1080p 1500 kbps 57173 b/f, 36.179 dB, 77879 ms -> 57199 b/f, 36.131 dB, 69821 ms (10%) pedestrian_area 1080p 2000 kbps 74241 b/f, 41.105 dB, 144031 ms -> 74271 b/f, 41.091 dB, 133614 ms (8%) Change-Id: Iaad28cbc99399030fc5f9951eb5aa7fa633f320e --- vp9/encoder/vp9_encoder.c | 4 +++- vp9/encoder/vp9_rd.h | 12 +++++++---- vp9/encoder/vp9_rdopt.c | 46 +++++++++++++++++++++++++++++++--------- vp9/encoder/vp9_speed_features.c | 3 +++ vp9/encoder/vp9_speed_features.h | 2 ++ 5 files changed, 52 insertions(+), 15 deletions(-) diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index 7b7a6e9..107bed5 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -991,8 +991,10 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf) { // Default rd threshold factors for mode selection for (i = 0; i < BLOCK_SIZES; ++i) { - for (j = 0; j < MAX_MODES; ++j) + for (j = 0; j < MAX_MODES; ++j) { cpi->rd.thresh_freq_fact[i][j] = 32; + cpi->rd.mode_map[i][j] = j; + } } #define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX3F, SDX8F, SDX4DF)\ diff --git a/vp9/encoder/vp9_rd.h b/vp9/encoder/vp9_rd.h index eeb5e0f..5dcb2f8 100644 --- a/vp9/encoder/vp9_rd.h +++ b/vp9/encoder/vp9_rd.h @@ -51,6 +51,12 @@ typedef enum { THR_NEARMV, THR_NEARA, + THR_NEARG, + + THR_ZEROMV, + THR_ZEROG, + THR_ZEROA, + THR_COMP_NEARESTLA, THR_COMP_NEARESTGA, @@ -58,13 +64,9 @@ typedef enum { THR_COMP_NEARLA, THR_COMP_NEWLA, - THR_NEARG, THR_COMP_NEARGA, THR_COMP_NEWGA, - THR_ZEROMV, - THR_ZEROG, - THR_ZEROA, THR_COMP_ZEROLA, THR_COMP_ZEROGA, @@ -98,6 +100,8 @@ typedef struct RD_OPT { int threshes[MAX_SEGMENTS][BLOCK_SIZES][MAX_MODES]; int thresh_freq_fact[BLOCK_SIZES][MAX_MODES]; + int mode_map[BLOCK_SIZES][MAX_MODES]; + int64_t comp_pred_diff[REFERENCE_MODES]; int64_t prediction_type_threshes[MAX_REF_FRAMES][REFERENCE_MODES]; int64_t tx_select_diff[TX_MODES]; diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 37832b7..81f6878 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -91,6 +91,12 @@ static const MODE_DEFINITION vp9_mode_order[MAX_MODES] = { {NEARMV, {LAST_FRAME, NONE}}, {NEARMV, {ALTREF_FRAME, NONE}}, + {NEARMV, {GOLDEN_FRAME, NONE}}, + + {ZEROMV, {LAST_FRAME, NONE}}, + {ZEROMV, {GOLDEN_FRAME, NONE}}, + {ZEROMV, {ALTREF_FRAME, NONE}}, + {NEARESTMV, {LAST_FRAME, ALTREF_FRAME}}, {NEARESTMV, {GOLDEN_FRAME, ALTREF_FRAME}}, @@ -98,13 +104,9 @@ static const MODE_DEFINITION vp9_mode_order[MAX_MODES] = { {NEARMV, {LAST_FRAME, ALTREF_FRAME}}, {NEWMV, {LAST_FRAME, ALTREF_FRAME}}, - {NEARMV, {GOLDEN_FRAME, NONE}}, {NEARMV, {GOLDEN_FRAME, ALTREF_FRAME}}, {NEWMV, {GOLDEN_FRAME, ALTREF_FRAME}}, - {ZEROMV, {LAST_FRAME, NONE}}, - {ZEROMV, {GOLDEN_FRAME, NONE}}, - {ZEROMV, {ALTREF_FRAME, NONE}}, {ZEROMV, {LAST_FRAME, ALTREF_FRAME}}, {ZEROMV, {GOLDEN_FRAME, ALTREF_FRAME}}, @@ -2572,7 +2574,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]; MB_MODE_INFO best_mbmode; int best_mode_skippable = 0; - int mode_index, best_mode_index = -1; + int midx, best_mode_index = -1; unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES]; vp9_prob comp_mode_p; int64_t best_intra_rd = INT64_MAX; @@ -2590,8 +2592,11 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, int mode_skip_start = cpi->sf.mode_skip_start + 1; const int *const rd_threshes = rd_opt->threshes[segment_id][bsize]; const int *const rd_thresh_freq_fact = rd_opt->thresh_freq_fact[bsize]; + int mode_threshold[MAX_MODES]; + int *mode_map = rd_opt->mode_map[bsize]; const int mode_search_skip_flags = cpi->sf.mode_search_skip_flags; vp9_zero(best_mbmode); + x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp, @@ -2686,7 +2691,25 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, mode_skip_mask[INTRA_FRAME] |= ~(cpi->sf.intra_y_mode_mask[max_txsize_lookup[bsize]]); - for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) { + for (i = 0; i < MAX_MODES; ++i) + mode_threshold[i] = ((int64_t)rd_threshes[i] * rd_thresh_freq_fact[i]) >> 5; + + midx = cpi->sf.schedule_mode_search ? mode_skip_start : 0; + while (midx > 4) { + uint8_t end_pos = 0; + for (i = 5; i < midx; ++i) { + if (mode_threshold[mode_map[i - 1]] > mode_threshold[mode_map[i]]) { + uint8_t tmp = mode_map[i]; + mode_map[i] = mode_map[i - 1]; + mode_map[i - 1] = tmp; + end_pos = i; + } + } + midx = end_pos; + } + + for (midx = 0; midx < MAX_MODES; ++midx) { + int mode_index = mode_map[midx]; int mode_excluded = 0; int64_t this_rd = INT64_MAX; int disable_skip = 0; @@ -2706,7 +2729,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, // Look at the reference frame of the best mode so far and set the // skip mask to look at a subset of the remaining modes. - if (mode_index == mode_skip_start && best_mode_index >= 0) { + if (midx == mode_skip_start && best_mode_index >= 0) { switch (best_mbmode.ref_frame[0]) { case INTRA_FRAME: break; @@ -2736,8 +2759,10 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, continue; // Test best rd so far against threshold for trying this mode. - if (rd_less_than_thresh(best_rd, rd_threshes[mode_index], - rd_thresh_freq_fact[mode_index])) + if (best_mode_skippable && cpi->sf.schedule_mode_search) + mode_threshold[mode_index] <<= 1; + + if (best_rd < mode_threshold[mode_index]) continue; if (cpi->sf.motion_field_mode_search) { @@ -3129,7 +3154,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, (cm->interp_filter == best_mbmode.interp_filter) || !is_inter_block(&best_mbmode)); - update_rd_thresh_fact(cpi, bsize, best_mode_index); + if (!cpi->rc.is_src_frame_alt_ref) + update_rd_thresh_fact(cpi, bsize, best_mode_index); // macroblock modes *mbmi = best_mbmode; diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index 7ddeac7..52e9a8e 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -95,9 +95,11 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm, : USE_LARGESTALL; if (MIN(cm->width, cm->height) >= 720) { sf->disable_split_mask = DISABLE_ALL_SPLIT; + sf->schedule_mode_search = cm->base_qindex < 220 ? 1 : 0; } else { sf->max_intra_bsize = BLOCK_32X32; sf->disable_split_mask = DISABLE_ALL_INTER_SPLIT; + sf->schedule_mode_search = cm->base_qindex < 175 ? 1 : 0; } sf->adaptive_pred_interp_filter = 0; sf->adaptive_mode_search = 1; @@ -376,6 +378,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->use_fast_coef_updates = TWO_LOOP; sf->use_fast_coef_costing = 0; sf->mode_skip_start = MAX_MODES; // Mode index at which mode skip mask set + sf->schedule_mode_search = 0; sf->use_nonrd_pick_mode = 0; for (i = 0; i < BLOCK_SIZES; ++i) sf->inter_mode_mask[i] = INTER_ALL; diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h index a56d7de..ed84008 100644 --- a/vp9/encoder/vp9_speed_features.h +++ b/vp9/encoder/vp9_speed_features.h @@ -318,6 +318,8 @@ typedef struct SPEED_FEATURES { // point for this motion search and limits the search range around it. int adaptive_motion_search; + int schedule_mode_search; + // Allows sub 8x8 modes to use the prediction filter that was determined // best for 8x8 mode. If set to 0 we always re check all the filters for // sizes less than 8x8, 1 means we check all filter modes if no 8x8 filter -- 2.7.4