Early termination in encoding partition search
authorYunqing Wang <yunqingwang@google.com>
Fri, 15 Aug 2014 00:25:21 +0000 (17:25 -0700)
committerYunqing Wang <yunqingwang@google.com>
Thu, 28 Aug 2014 18:27:28 +0000 (11:27 -0700)
In the partition search, the encoder checks all possible
partitionings in the superblock's partition search tree.
This patch proposed a set of criteria for partition search
early termination, which effectively decided whether or
not to terminate the search in current branch based on the
"skippable" result of the quantized transform coefficients.
The "skippable" information was gathered during the
partition mode search, and no overhead calculations were
introduced.

This patch gives significant encoding speed gains without
sacrificing the quality.

Borg test results:
1. At speed 1,
   stdhd set: psnr: +0.074%, ssim: +0.093%;
   derf set:  psnr: -0.024%, ssim: +0.011%;
2. At speed 2,
   stdhd set: psnr: +0.033%, ssim: +0.100%;
   derf set:  psnr: -0.062%, ssim: +0.003%;
3. At speed 3,
   stdhd set: psnr: +0.060%, ssim: +0.190%;
   derf set:  psnr: -0.064%, ssim: -0.002%;
4. At speed 4,
   stdhd set: psnr: +0.070%, ssim: +0.143%;
   derf set:  psnr: -0.104%, ssim: +0.039%;

The speedup ranges from several percent to 60+%.
                 speed1    speed2    speed3    speed4
(1080p, 100f):
old_town_cross:  48.2%     23.9%     20.8%     16.5%
park_joy:        11.4%     17.8%     29.4%     18.2%
pedestrian_area: 10.7%      4.0%      4.2%      2.4%
(720p, 200f):
mobcal:          68.1%     36.3%     34.4%     17.7%
parkrun:         15.8%     24.2%     37.1%     16.8%
shields:         45.1%     32.8%     30.1%      9.6%
(cif, 300f)
bus:              3.7%     10.4%     14.0%      7.9%
deadline:        13.6%     14.8%     12.6%     10.9%
mobile:           5.3%     11.5%     14.7%     10.7%

Change-Id: I246c38fb952ad762ce5e365711235b605f470a66

vp9/encoder/vp9_context_tree.h
vp9/encoder/vp9_encodeframe.c
vp9/encoder/vp9_rdopt.c
vp9/encoder/vp9_speed_features.c
vp9/encoder/vp9_speed_features.h

index d60e6c3..0cbb244 100644 (file)
@@ -34,6 +34,9 @@ typedef struct {
   int num_4x4_blk;
   int skip;
   int skip_txfm[MAX_MB_PLANE];
+  // For current partition, only if all Y, U, and V transform blocks'
+  // coefficients are quantized to 0, skippable is set to 0.
+  int skippable;
   int best_mode_index;
   int hybrid_pred_diff;
   int comp_pred_diff;
index 950a6c8..07134dc 100644 (file)
@@ -727,6 +727,7 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile,
     p[i].eobs = ctx->eobs_pbuf[i][0];
   }
   ctx->is_coded = 0;
+  ctx->skippable = 0;
   x->skip_recode = 0;
 
   // Set to zero to make sure we do not use the previous encoded frame stats
@@ -2158,8 +2159,8 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
       sum_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_dist);
 
       if (sum_rd < best_rd) {
-        int64_t stop_thresh = 4096;
-        int64_t stop_thresh_rd;
+        int64_t dist_breakout_thr = cpi->sf.partition_search_breakout_dist_thr;
+        int rate_breakout_thr = cpi->sf.partition_search_breakout_rate_thr;
 
         best_rate = this_rate;
         best_dist = this_dist;
@@ -2167,14 +2168,18 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
         if (bsize >= BLOCK_8X8)
           pc_tree->partitioning = PARTITION_NONE;
 
-        // Adjust threshold according to partition size.
-        stop_thresh >>= 8 - (b_width_log2(bsize) +
+        // Adjust dist breakout threshold according to the partition size.
+        dist_breakout_thr >>= 8 - (b_width_log2(bsize) +
             b_height_log2(bsize));
 
-        stop_thresh_rd = RDCOST(x->rdmult, x->rddiv, 0, stop_thresh);
-        // If obtained distortion is very small, choose current partition
-        // and stop splitting.
-        if (!x->e_mbd.lossless && best_rd < stop_thresh_rd) {
+        // If all y, u, v transform blocks in this partition are skippable, and
+        // the dist & rate are within the thresholds, the partition search is
+        // terminated for current branch of the partition search tree.
+        // The dist & rate thresholds are set to 0 at speed 0 to disable the
+        // early termination at that speed.
+        if (!x->e_mbd.lossless &&
+            (ctx->skippable && best_dist < dist_breakout_thr &&
+            best_rate < rate_breakout_thr)) {
           do_split = 0;
           do_rect = 0;
         }
index 2efa3db..f73006e 100644 (file)
@@ -1720,12 +1720,14 @@ static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
                          int mode_index,
                          int64_t comp_pred_diff[REFERENCE_MODES],
                          const int64_t tx_size_diff[TX_MODES],
-                         int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]) {
+                         int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS],
+                         int skippable) {
   MACROBLOCKD *const xd = &x->e_mbd;
 
   // Take a snapshot of the coding context so it can be
   // restored if we decide to encode this way
   ctx->skip = x->skip;
+  ctx->skippable = skippable;
   ctx->best_mode_index = mode_index;
   ctx->mic = *xd->mi[0];
   ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE];
@@ -2556,6 +2558,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
   int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
   int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
   MB_MODE_INFO best_mbmode;
+  int best_mode_skippable = 0;
   int mode_index, best_mode_index = -1;
   unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
   vp9_prob comp_mode_p;
@@ -2963,6 +2966,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
         best_rd = this_rd;
         best_mbmode = *mbmi;
         best_skip2 = this_skip2;
+        best_mode_skippable = skippable;
+
         if (!x->select_tx_size)
           swap_block_ptr(x, ctx, 1, 0, 0, max_plane);
         vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
@@ -3119,8 +3124,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
   }
 
   set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
-  store_coding_context(x, ctx, best_mode_index,
-                       best_pred_diff, best_tx_diff, best_filter_diff);
+  store_coding_context(x, ctx, best_mode_index, best_pred_diff,
+                       best_tx_diff, best_filter_diff, best_mode_skippable);
 
   return best_rd;
 }
@@ -3225,7 +3230,7 @@ int64_t vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi, MACROBLOCK *x,
   if (!x->select_tx_size)
     swap_block_ptr(x, ctx, 1, 0, 0, MAX_MB_PLANE);
   store_coding_context(x, ctx, THR_ZEROMV,
-                       best_pred_diff, best_tx_diff, best_filter_diff);
+                       best_pred_diff, best_tx_diff, best_filter_diff, 0);
 
   return this_rd;
 }
@@ -3830,7 +3835,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
 
   set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
   store_coding_context(x, ctx, best_ref_index,
-                       best_pred_diff, best_tx_diff, best_filter_diff);
+                       best_pred_diff, best_tx_diff, best_filter_diff, 0);
 
   return best_rd;
 }
index 0afcde5..0bead48 100644 (file)
@@ -92,6 +92,12 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
     sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V;
 
     sf->tx_size_search_breakout = 1;
+
+    if (MIN(cm->width, cm->height) >= 720)
+      sf->partition_search_breakout_dist_thr = (1 << 23);
+    else
+      sf->partition_search_breakout_dist_thr = (1 << 21);
+    sf->partition_search_breakout_rate_thr = 500;
   }
 
   if (speed >= 2) {
@@ -120,6 +126,12 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
     sf->auto_min_max_partition_size = CONSTRAIN_NEIGHBORING_MIN_MAX;
     sf->use_lastframe_partitioning = LAST_FRAME_PARTITION_LOW_MOTION;
     sf->adjust_partitioning_from_last_frame = 1;
+
+    if (MIN(cm->width, cm->height) >= 720)
+      sf->partition_search_breakout_dist_thr = (1 << 24);
+    else
+      sf->partition_search_breakout_dist_thr = (1 << 22);
+    sf->partition_search_breakout_rate_thr = 700;
   }
 
   if (speed >= 3) {
@@ -144,6 +156,12 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
     sf->intra_y_mode_mask[TX_32X32] = INTRA_DC;
     sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC;
     sf->adaptive_interp_filter_search = 1;
+
+    if (MIN(cm->width, cm->height) >= 720)
+      sf->partition_search_breakout_dist_thr = (1 << 25);
+    else
+      sf->partition_search_breakout_dist_thr = (1 << 23);
+    sf->partition_search_breakout_rate_thr = 1000;
   }
 
   if (speed >= 4) {
@@ -158,6 +176,12 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
     sf->use_lp32x32fdct = 1;
     sf->use_fast_coef_updates = ONE_LOOP_REDUCED;
     sf->use_fast_coef_costing = 1;
+
+    if (MIN(cm->width, cm->height) >= 720)
+      sf->partition_search_breakout_dist_thr = (1 << 26);
+    else
+      sf->partition_search_breakout_dist_thr = (1 << 24);
+    sf->partition_search_breakout_rate_thr = 1500;
   }
 
   if (speed >= 5) {
@@ -411,6 +435,8 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
   sf->recode_tolerance = 25;
   sf->default_interp_filter = SWITCHABLE;
   sf->tx_size_search_breakout = 0;
+  sf->partition_search_breakout_dist_thr = 0;
+  sf->partition_search_breakout_rate_thr = 0;
 
   if (oxcf->mode == REALTIME)
     set_rt_speed_feature(cpi, sf, oxcf->speed, oxcf->content);
index 46eedc1..e31185e 100644 (file)
@@ -393,6 +393,10 @@ typedef struct SPEED_FEATURES {
 
   // mask for skip evaluation of certain interp_filter type.
   INTERP_FILTER_MASK interp_filter_search_mask;
+
+  // Partition search early breakout thresholds.
+  int64_t partition_search_breakout_dist_thr;
+  int partition_search_breakout_rate_thr;
 } SPEED_FEATURES;
 
 struct VP9_COMP;