Cleanup/enhancements of switchable filter search
authorDeb Mukherjee <debargha@google.com>
Fri, 16 Aug 2013 20:51:00 +0000 (13:51 -0700)
committerDeb Mukherjee <debargha@google.com>
Tue, 20 Aug 2013 16:47:04 +0000 (09:47 -0700)
Cleans up the switchable filter search logic. Also adds a
speed feature - a variance threshold - to disable filter search
if source variance is lower than this value.

Results: derfraw300
threshold = 16, psnr -0.238%, 4-5% speedup (tested on football)
threshold = 32, psnr -0.381%, 8-9% speedup (tested on football)
threshold = 64, psnr -0.611%, 12-13% speedup (tested on football)
threshold = 96, psnr -0.804%, 16-17% speedup (tested on football)

Based on these results, the threshold is chosen as 16 for speed 1,
32 for speed 2, 64 for speed 3 and 96 for speed 4.

Change-Id: Ib630d39192773b1983d3d349b97973768e170c04

vp9/encoder/vp9_onyx_if.c
vp9/encoder/vp9_onyx_int.h
vp9/encoder/vp9_rdopt.c

index 44e9aa5..13b8c82 100644 (file)
@@ -716,7 +716,6 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
   sf->use_lastframe_partitioning = 0;
   sf->tx_size_search_method = USE_FULL_RD;
   sf->use_lp32x32fdct = 0;
-  sf->use_8tap_always = 0;
   sf->use_avoid_tested_higherror = 0;
   sf->reference_masking = 0;
   sf->skip_lots_of_modes = 0;
@@ -735,6 +734,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
   sf->disable_splitmv = 0;
   sf->mode_search_skip_flags = 0;
   sf->disable_split_var_thresh = 0;
+  sf->disable_filter_search_var_thresh = 0;
   sf->last_chroma_intra_mode = TM_PRED;
   sf->use_rd_breakout = 0;
   sf->skip_encode_sb = 0;
@@ -794,6 +794,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
         sf->auto_min_max_partition_size = 1;
         sf->auto_min_max_partition_interval = 1;
         sf->disable_split_var_thresh = 32;
+        sf->disable_filter_search_var_thresh = 16;
       }
       if (speed == 2) {
         sf->adjust_thresholds_by_speed = 1;
@@ -830,6 +831,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
         sf->disable_split_var_thresh = 64;
         sf->auto_min_max_partition_size = 1;
         sf->auto_min_max_partition_interval = 2;
+        sf->disable_filter_search_var_thresh = 32;
       }
       if (speed == 3) {
         sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
@@ -853,6 +855,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
         sf->search_method = BIGDIA;
         sf->subpel_iters_per_step = 1;
         sf->disable_split_var_thresh = 64;
+        sf->disable_filter_search_var_thresh = 64;
       }
       if (speed == 4) {
         sf->comp_inter_joint_search_thresh = BLOCK_SIZES;
@@ -880,6 +883,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
         sf->search_method = HEX;
         sf->subpel_iters_per_step = 1;
         sf->disable_split_var_thresh = 64;
+        sf->disable_filter_search_var_thresh = 96;
       }
       /*
       if (speed == 2) {
index de6f34c..1b0e5ce 100644 (file)
@@ -259,7 +259,6 @@ typedef struct {
   int use_lastframe_partitioning;
   TX_SIZE_SEARCH_METHOD tx_size_search_method;
   int use_lp32x32fdct;
-  int use_8tap_always;
   int use_avoid_tested_higherror;
   int skip_lots_of_modes;
   int adjust_thresholds_by_speed;
@@ -286,6 +285,9 @@ typedef struct {
   unsigned int mode_search_skip_flags;
   // A source variance threshold below which the split mode is disabled
   unsigned int disable_split_var_thresh;
+  // A source variance threshold below which filter search is disabled
+  // Choose a very large value (UINT_MAX) to use 8-tap always
+  unsigned int disable_filter_search_var_thresh;
   MB_PREDICTION_MODE last_chroma_intra_mode;
   int use_rd_breakout;
   int use_uv_intra_rd_estimate;
index 36a7722..be4ca93 100644 (file)
@@ -2625,7 +2625,6 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
   int64_t this_rd = 0;
   DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf, MAX_MB_PLANE * 64 * 64);
   int pred_exists = 0;
-  int interpolating_intpel_seen = 0;
   int intpel_mv;
   int64_t rd, best_rd = INT64_MAX;
   int best_needs_copy = 0;
@@ -2738,7 +2737,6 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
   }
 
   pred_exists = 0;
-  interpolating_intpel_seen = 0;
   // Are all MVs integer pel for Y and UV
   intpel_mv = (mbmi->mv[0].as_mv.row & 15) == 0 &&
       (mbmi->mv[0].as_mv.col & 15) == 0;
@@ -2747,97 +2745,97 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
         (mbmi->mv[1].as_mv.col & 15) == 0;
   // Search for best switchable filter by checking the variance of
   // pred error irrespective of whether the filter will be used
-  *best_filter = EIGHTTAP;
-  if (cpi->sf.use_8tap_always) {
+  if (cm->mcomp_filter_type != BILINEAR) {
     *best_filter = EIGHTTAP;
-    vp9_zero(cpi->rd_filter_cache);
-  } else {
-    int i, newbest;
-    int tmp_rate_sum = 0;
-    int64_t tmp_dist_sum = 0;
-
-    cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] = INT64_MAX;
-    for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) {
-      int j;
-      int64_t rs_rd;
-      const int is_intpel_interp = intpel_mv;
-      mbmi->interp_filter = i;
-      vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
-      rs = get_switchable_rate(x);
-      rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
-
-      if (interpolating_intpel_seen && is_intpel_interp) {
-        cpi->rd_filter_cache[i] = RDCOST(x->rdmult, x->rddiv,
-                                         tmp_rate_sum, tmp_dist_sum);
-        cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] =
-            MIN(cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS],
-                cpi->rd_filter_cache[i] + rs_rd);
-        rd = cpi->rd_filter_cache[i];
-        if (cm->mcomp_filter_type == SWITCHABLE)
-          rd += rs_rd;
-      } else {
-        int rate_sum = 0;
-        int64_t dist_sum = 0;
-        if ((cm->mcomp_filter_type == SWITCHABLE &&
-             (!i || best_needs_copy)) ||
-            (cm->mcomp_filter_type != SWITCHABLE &&
-             (cm->mcomp_filter_type == mbmi->interp_filter ||
-              (!interpolating_intpel_seen && is_intpel_interp)))) {
-          for (j = 0; j < MAX_MB_PLANE; j++) {
-            xd->plane[j].dst.buf = orig_dst[j];
-            xd->plane[j].dst.stride = orig_dst_stride[j];
-          }
+    if (x->source_variance <
+        cpi->sf.disable_filter_search_var_thresh) {
+      *best_filter = EIGHTTAP;
+      vp9_zero(cpi->rd_filter_cache);
+    } else {
+      int i, newbest;
+      int tmp_rate_sum = 0;
+      int64_t tmp_dist_sum = 0;
+
+      cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] = INT64_MAX;
+      for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) {
+        int j;
+        int64_t rs_rd;
+        mbmi->interp_filter = i;
+        vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
+        rs = get_switchable_rate(x);
+        rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
+
+        if (i > 0 && intpel_mv) {
+          cpi->rd_filter_cache[i] = RDCOST(x->rdmult, x->rddiv,
+                                           tmp_rate_sum, tmp_dist_sum);
+          cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] =
+              MIN(cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS],
+                  cpi->rd_filter_cache[i] + rs_rd);
+          rd = cpi->rd_filter_cache[i];
+          if (cm->mcomp_filter_type == SWITCHABLE)
+            rd += rs_rd;
         } else {
-          for (j = 0; j < MAX_MB_PLANE; j++) {
-            xd->plane[j].dst.buf = tmp_buf + j * 64 * 64;
-            xd->plane[j].dst.stride = 64;
+          int rate_sum = 0;
+          int64_t dist_sum = 0;
+          if ((cm->mcomp_filter_type == SWITCHABLE &&
+               (!i || best_needs_copy)) ||
+              (cm->mcomp_filter_type != SWITCHABLE &&
+               (cm->mcomp_filter_type == mbmi->interp_filter ||
+                (i == 0 && intpel_mv)))) {
+            for (j = 0; j < MAX_MB_PLANE; j++) {
+              xd->plane[j].dst.buf = orig_dst[j];
+              xd->plane[j].dst.stride = orig_dst_stride[j];
+            }
+          } else {
+            for (j = 0; j < MAX_MB_PLANE; j++) {
+              xd->plane[j].dst.buf = tmp_buf + j * 64 * 64;
+              xd->plane[j].dst.stride = 64;
+            }
+          }
+          vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
+          model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum);
+          cpi->rd_filter_cache[i] = RDCOST(x->rdmult, x->rddiv,
+                                           rate_sum, dist_sum);
+          cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] =
+              MIN(cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS],
+                  cpi->rd_filter_cache[i] + rs_rd);
+          rd = cpi->rd_filter_cache[i];
+          if (cm->mcomp_filter_type == SWITCHABLE)
+            rd += rs_rd;
+          if (i == 0 && intpel_mv) {
+            tmp_rate_sum = rate_sum;
+            tmp_dist_sum = dist_sum;
           }
         }
-        vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
-        model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum);
-        cpi->rd_filter_cache[i] = RDCOST(x->rdmult, x->rddiv,
-                                         rate_sum, dist_sum);
-        cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] =
-            MIN(cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS],
-                cpi->rd_filter_cache[i] + rs_rd);
-        rd = cpi->rd_filter_cache[i];
-        if (cm->mcomp_filter_type == SWITCHABLE)
-          rd += rs_rd;
-        if (!interpolating_intpel_seen && is_intpel_interp) {
-          tmp_rate_sum = rate_sum;
-          tmp_dist_sum = dist_sum;
-        }
-      }
-      if (i == 0 && cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
-        if (rd / 2 > ref_best_rd) {
-          for (i = 0; i < MAX_MB_PLANE; i++) {
-            xd->plane[i].dst.buf = orig_dst[i];
-            xd->plane[i].dst.stride = orig_dst_stride[i];
+        if (i == 0 && cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
+          if (rd / 2 > ref_best_rd) {
+            for (i = 0; i < MAX_MB_PLANE; i++) {
+              xd->plane[i].dst.buf = orig_dst[i];
+              xd->plane[i].dst.stride = orig_dst_stride[i];
+            }
+            return INT64_MAX;
           }
-          return INT64_MAX;
         }
-      }
-      newbest = i == 0 || rd < best_rd;
-
-      if (newbest) {
-        best_rd = rd;
-        *best_filter = mbmi->interp_filter;
-        if (cm->mcomp_filter_type == SWITCHABLE && i &&
-            !(interpolating_intpel_seen && is_intpel_interp))
-          best_needs_copy = !best_needs_copy;
-      }
+        newbest = i == 0 || rd < best_rd;
+
+        if (newbest) {
+          best_rd = rd;
+          *best_filter = mbmi->interp_filter;
+          if (cm->mcomp_filter_type == SWITCHABLE && i && !intpel_mv)
+            best_needs_copy = !best_needs_copy;
+        }
 
-      if ((cm->mcomp_filter_type == SWITCHABLE && newbest) ||
-          (cm->mcomp_filter_type != SWITCHABLE &&
-           cm->mcomp_filter_type == mbmi->interp_filter)) {
-        pred_exists = 1;
+        if ((cm->mcomp_filter_type == SWITCHABLE && newbest) ||
+            (cm->mcomp_filter_type != SWITCHABLE &&
+             cm->mcomp_filter_type == mbmi->interp_filter)) {
+          pred_exists = 1;
+        }
       }
-      interpolating_intpel_seen |= is_intpel_interp;
-    }
 
-    for (i = 0; i < MAX_MB_PLANE; i++) {
-      xd->plane[i].dst.buf = orig_dst[i];
-      xd->plane[i].dst.stride = orig_dst_stride[i];
+      for (i = 0; i < MAX_MB_PLANE; i++) {
+        xd->plane[i].dst.buf = orig_dst[i];
+        xd->plane[i].dst.stride = orig_dst_stride[i];
+      }
     }
   }
   // Set the appropriate filter
@@ -3486,66 +3484,76 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
       xd->mode_info_context->mbmi.txfm_size = TX_4X4;
 
       cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] = INT64_MAX;
-      for (switchable_filter_index = 0;
-           switchable_filter_index < VP9_SWITCHABLE_FILTERS;
-           ++switchable_filter_index) {
-        int newbest, rs;
-        int64_t rs_rd;
-        mbmi->interp_filter = switchable_filter_index;
-        vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
-
-        tmp_rd = rd_pick_best_mbsegmentation(cpi, x,
-                     &mbmi->ref_mvs[ref_frame][0],
-                     second_ref,
-                     best_yrd,
-                     &rate, &rate_y, &distortion,
-                     &skippable, &total_sse,
-                     (int)this_rd_thresh, seg_mvs,
-                     bsi, switchable_filter_index,
-                     mi_row, mi_col);
-        if (tmp_rd == INT64_MAX)
-          continue;
-
-        cpi->rd_filter_cache[switchable_filter_index] = tmp_rd;
-        rs = get_switchable_rate(x);
-        rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
-        cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] =
-            MIN(cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS], tmp_rd + rs_rd);
-        if (cm->mcomp_filter_type == SWITCHABLE)
-          tmp_rd += rs_rd;
-
-        newbest = (tmp_rd < tmp_best_rd);
-        if (newbest) {
-          tmp_best_filter = mbmi->interp_filter;
-          tmp_best_rd = tmp_rd;
-        }
-        if ((newbest && cm->mcomp_filter_type == SWITCHABLE) ||
-            (mbmi->interp_filter == cm->mcomp_filter_type &&
-             cm->mcomp_filter_type != SWITCHABLE)) {
-          tmp_best_rdu = tmp_rd;
-          tmp_best_rate = rate;
-          tmp_best_ratey = rate_y;
-          tmp_best_distortion = distortion;
-          tmp_best_sse = total_sse;
-          tmp_best_skippable = skippable;
-          tmp_best_mbmode = *mbmi;
-          tmp_best_partition = *x->partition_info;
-          for (i = 0; i < 4; i++)
-            tmp_best_bmodes[i] = xd->mode_info_context->bmi[i];
-          pred_exists = 1;
-          if (switchable_filter_index == 0 &&
-              cpi->sf.use_rd_breakout &&
-              best_rd < INT64_MAX) {
-            if (tmp_best_rdu / 2 > best_rd) {
-              // skip searching the other filters if the first is
-              // already substantially larger than the best so far
+      if (cm->mcomp_filter_type != BILINEAR) {
+        tmp_best_filter = EIGHTTAP;
+        if (x->source_variance <
+            cpi->sf.disable_filter_search_var_thresh) {
+          tmp_best_filter = EIGHTTAP;
+          vp9_zero(cpi->rd_filter_cache);
+        } else {
+          for (switchable_filter_index = 0;
+               switchable_filter_index < VP9_SWITCHABLE_FILTERS;
+               ++switchable_filter_index) {
+            int newbest, rs;
+            int64_t rs_rd;
+            mbmi->interp_filter = switchable_filter_index;
+            vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
+
+            tmp_rd = rd_pick_best_mbsegmentation(cpi, x,
+                                                 &mbmi->ref_mvs[ref_frame][0],
+                                                 second_ref,
+                                                 best_yrd,
+                                                 &rate, &rate_y, &distortion,
+                                                 &skippable, &total_sse,
+                                                 (int)this_rd_thresh, seg_mvs,
+                                                 bsi, switchable_filter_index,
+                                                 mi_row, mi_col);
+
+            if (tmp_rd == INT64_MAX)
+              continue;
+            cpi->rd_filter_cache[switchable_filter_index] = tmp_rd;
+            rs = get_switchable_rate(x);
+            rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
+            cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] =
+                MIN(cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS],
+                    tmp_rd + rs_rd);
+            if (cm->mcomp_filter_type == SWITCHABLE)
+              tmp_rd += rs_rd;
+
+            newbest = (tmp_rd < tmp_best_rd);
+            if (newbest) {
               tmp_best_filter = mbmi->interp_filter;
-              tmp_best_rdu = INT64_MAX;
-              break;
+              tmp_best_rd = tmp_rd;
             }
-          }
+            if ((newbest && cm->mcomp_filter_type == SWITCHABLE) ||
+                (mbmi->interp_filter == cm->mcomp_filter_type &&
+                 cm->mcomp_filter_type != SWITCHABLE)) {
+              tmp_best_rdu = tmp_rd;
+              tmp_best_rate = rate;
+              tmp_best_ratey = rate_y;
+              tmp_best_distortion = distortion;
+              tmp_best_sse = total_sse;
+              tmp_best_skippable = skippable;
+              tmp_best_mbmode = *mbmi;
+              tmp_best_partition = *x->partition_info;
+              for (i = 0; i < 4; i++)
+                tmp_best_bmodes[i] = xd->mode_info_context->bmi[i];
+              pred_exists = 1;
+              if (switchable_filter_index == 0 &&
+                  cpi->sf.use_rd_breakout &&
+                  best_rd < INT64_MAX) {
+                if (tmp_best_rdu / 2 > best_rd) {
+                  // skip searching the other filters if the first is
+                  // already substantially larger than the best so far
+                  tmp_best_filter = mbmi->interp_filter;
+                  tmp_best_rdu = INT64_MAX;
+                  break;
+                }
+              }
+            }
+          }  // switchable_filter_index loop
         }
-      }  // switchable_filter_index loop
+      }
 
       if (tmp_best_rdu == INT64_MAX)
         continue;