Merge "vp9_reconinter.h static functions in header converted to global"
[platform/upstream/libvpx.git] / vp9 / encoder / vp9_rdopt.c
index e2d2d5a..361bbd9 100644 (file)
@@ -30,6 +30,7 @@
 #include "vp9/common/vp9_seg_common.h"
 #include "vp9/common/vp9_systemdependent.h"
 
+#include "vp9/encoder/vp9_cost.h"
 #include "vp9/encoder/vp9_encodemb.h"
 #include "vp9/encoder/vp9_encodemv.h"
 #include "vp9/encoder/vp9_mcomp.h"
@@ -38,7 +39,6 @@
 #include "vp9/encoder/vp9_ratectrl.h"
 #include "vp9/encoder/vp9_rdopt.h"
 #include "vp9/encoder/vp9_tokenize.h"
-#include "vp9/encoder/vp9_treewriter.h"
 #include "vp9/encoder/vp9_variance.h"
 
 /* Factor to weigh the rate for switchable interp filters */
@@ -157,10 +157,10 @@ static void fill_mode_costs(VP9_COMP *cpi) {
 
   // TODO(rbultje) separate tables for superblock costing?
   vp9_cost_tokens(x->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree);
-  vp9_cost_tokens(x->intra_uv_mode_cost[1],
-                  fc->uv_mode_prob[INTRA_MODES - 1], vp9_intra_mode_tree);
-  vp9_cost_tokens(x->intra_uv_mode_cost[0],
-                  vp9_kf_uv_mode_prob[INTRA_MODES - 1], vp9_intra_mode_tree);
+  vp9_cost_tokens(x->intra_uv_mode_cost[KEY_FRAME],
+                  vp9_kf_uv_mode_prob[TM_PRED], vp9_intra_mode_tree);
+  vp9_cost_tokens(x->intra_uv_mode_cost[INTER_FRAME],
+                  fc->uv_mode_prob[TM_PRED], vp9_intra_mode_tree);
 
   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
     vp9_cost_tokens((int *)x->switchable_interp_costs[i],
@@ -282,14 +282,12 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) {
   x->errorperbit = cpi->RDMULT / RD_MULT_EPB_RATIO;
   x->errorperbit += (x->errorperbit == 0);
 
-  vp9_set_speed_features(cpi);
-
   x->select_txfm_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
                          cm->frame_type != KEY_FRAME) ? 0 : 1;
 
   set_block_thresholds(cpi);
 
-  if (!cpi->sf.use_nonrd_pick_mode) {
+  if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME) {
     fill_token_costs(x->token_costs, cm->fc.coef_probs);
 
     for (i = 0; i < PARTITION_CONTEXTS; i++)
@@ -297,7 +295,8 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) {
                       vp9_partition_tree);
   }
 
-  if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1) {
+  if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1 ||
+      cm->frame_type == KEY_FRAME) {
     fill_mode_costs(cpi);
 
     if (!frame_is_intra_only(cm)) {
@@ -396,9 +395,9 @@ static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
   *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
 }
 
-static void model_rd_from_var_lapndz(unsigned int var, unsigned int n,
-                                     unsigned int qstep, int *rate,
-                                     int64_t *dist) {
+void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n,
+                                  unsigned int qstep, int *rate,
+                                  int64_t *dist) {
   // This function models the rate and distortion for a Laplacian
   // source with given variance when quantized with a uniform quantizer
   // with given stepsize. The closed form expressions are in:
@@ -460,8 +459,8 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
     } else {
       int rate;
       int64_t dist;
-      model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],
-                               pd->dequant[1] >> 3, &rate, &dist);
+      vp9_model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],
+                                   pd->dequant[1] >> 3, &rate, &dist);
       rate_sum += rate;
       dist_sum += dist;
     }
@@ -508,7 +507,8 @@ static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE bsize,
                          &pd->dst.buf[j * pd->dst.stride + k], pd->dst.stride,
                          &sse);
       // sse works better than var, since there is no dc prediction used
-      model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3, &rate, &dist);
+      vp9_model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3,
+                                   &rate, &dist);
       rate_sum += rate;
       dist_sum += dist;
       *out_skip &= (rate < 1024);
@@ -1037,7 +1037,7 @@ static int conditional_skipintra(MB_PREDICTION_MODE mode,
 
 static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
                                      MB_PREDICTION_MODE *best_mode,
-                                     int *bmode_costs,
+                                     const int *bmode_costs,
                                      ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
                                      int *bestrate, int *bestratey,
                                      int64_t *bestdistortion,
@@ -1162,14 +1162,12 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
   return best_rd;
 }
 
-static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP * const cpi,
-                                            MACROBLOCK * const mb,
-                                            int * const rate,
-                                            int * const rate_y,
-                                            int64_t * const distortion,
+static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP *cpi, MACROBLOCK *mb,
+                                            int *rate, int *rate_y,
+                                            int64_t *distortion,
                                             int64_t best_rd) {
   int i, j;
-  MACROBLOCKD *const xd = &mb->e_mbd;
+  const MACROBLOCKD *const xd = &mb->e_mbd;
   MODE_INFO *const mic = xd->mi_8x8[0];
   const MODE_INFO *above_mi = xd->mi_8x8[-xd->mode_info_stride];
   const MODE_INFO *left_mi = xd->left_available ? xd->mi_8x8[-1] : NULL;
@@ -1182,13 +1180,11 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP * const cpi,
   int tot_rate_y = 0;
   int64_t total_rd = 0;
   ENTROPY_CONTEXT t_above[4], t_left[4];
-  int *bmode_costs;
+  const int *bmode_costs = mb->mbmode_cost;
 
   vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
   vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
 
-  bmode_costs = mb->mbmode_cost;
-
   // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block.
   for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
     for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
@@ -1463,12 +1459,6 @@ static int cost_mv_ref(VP9_COMP *cpi, MB_PREDICTION_MODE mode,
   }
 }
 
-void vp9_set_mbmode_and_mvs(MACROBLOCKD *xd, MB_PREDICTION_MODE mode,
-                            const MV *mv) {
-  xd->mi_8x8[0]->mbmi.mode = mode;
-  xd->mi_8x8[0]->mbmi.mv[0].as_mv = *mv;
-}
-
 static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
                                 BLOCK_SIZE bsize,
                                 int_mv *frame_mv,
@@ -1476,59 +1466,56 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
                                 int_mv single_newmv[MAX_REF_FRAMES],
                                 int *rate_mv);
 
-static int labels2mode(MACROBLOCK *x, int i,
+static int labels2mode(VP9_COMP *cpi, MACROBLOCKD *xd, int i,
                        MB_PREDICTION_MODE mode,
-                       int_mv *this_mv, int_mv *this_second_mv,
+                       int_mv this_mv[2],
                        int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
                        int_mv seg_mvs[MAX_REF_FRAMES],
-                       int_mv *best_ref_mv,
-                       int_mv *second_best_ref_mv,
-                       int *mvjcost, int *mvcost[2], VP9_COMP *cpi) {
-  MACROBLOCKD *const xd = &x->e_mbd;
+                       int_mv *best_ref_mv[2],
+                       const int *mvjcost, int *mvcost[2]) {
   MODE_INFO *const mic = xd->mi_8x8[0];
-  MB_MODE_INFO *mbmi = &mic->mbmi;
+  const MB_MODE_INFO *const mbmi = &mic->mbmi;
   int thismvcost = 0;
   int idx, idy;
   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
   const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
-  const int has_second_rf = has_second_ref(mbmi);
+  const int is_compound = has_second_ref(mbmi);
 
   // the only time we should do costing for new motion vector or mode
   // is when we are on a new label  (jbb May 08, 2007)
   switch (mode) {
     case NEWMV:
-      this_mv->as_int = seg_mvs[mbmi->ref_frame[0]].as_int;
-      thismvcost += vp9_mv_bit_cost(&this_mv->as_mv, &best_ref_mv->as_mv,
+      this_mv[0].as_int = seg_mvs[mbmi->ref_frame[0]].as_int;
+      thismvcost += vp9_mv_bit_cost(&this_mv[0].as_mv, &best_ref_mv[0]->as_mv,
                                     mvjcost, mvcost, MV_COST_WEIGHT_SUB);
-      if (has_second_rf) {
-        this_second_mv->as_int = seg_mvs[mbmi->ref_frame[1]].as_int;
-        thismvcost += vp9_mv_bit_cost(&this_second_mv->as_mv,
-                                      &second_best_ref_mv->as_mv,
+      if (is_compound) {
+        this_mv[1].as_int = seg_mvs[mbmi->ref_frame[1]].as_int;
+        thismvcost += vp9_mv_bit_cost(&this_mv[1].as_mv, &best_ref_mv[1]->as_mv,
                                       mvjcost, mvcost, MV_COST_WEIGHT_SUB);
       }
       break;
     case NEARESTMV:
-      this_mv->as_int = frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int;
-      if (has_second_rf)
-        this_second_mv->as_int = frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int;
+      this_mv[0].as_int = frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int;
+      if (is_compound)
+        this_mv[1].as_int = frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int;
       break;
     case NEARMV:
-      this_mv->as_int = frame_mv[NEARMV][mbmi->ref_frame[0]].as_int;
-      if (has_second_rf)
-        this_second_mv->as_int = frame_mv[NEARMV][mbmi->ref_frame[1]].as_int;
+      this_mv[0].as_int = frame_mv[NEARMV][mbmi->ref_frame[0]].as_int;
+      if (is_compound)
+        this_mv[1].as_int = frame_mv[NEARMV][mbmi->ref_frame[1]].as_int;
       break;
     case ZEROMV:
-      this_mv->as_int = 0;
-      if (has_second_rf)
-        this_second_mv->as_int = 0;
+      this_mv[0].as_int = 0;
+      if (is_compound)
+        this_mv[1].as_int = 0;
       break;
     default:
       break;
   }
 
-  mic->bmi[i].as_mv[0].as_int = this_mv->as_int;
-  if (has_second_rf)
-    mic->bmi[i].as_mv[1].as_int = this_second_mv->as_int;
+  mic->bmi[i].as_mv[0].as_int = this_mv[0].as_int;
+  if (is_compound)
+    mic->bmi[i].as_mv[1].as_int = this_mv[1].as_int;
 
   mic->bmi[i].as_mode = mode;
 
@@ -1704,6 +1691,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
   int mode_idx;
   int subpelmv = 1, have_ref = 0;
   const int has_second_rf = has_second_ref(mbmi);
+  const int disable_inter_mode_mask = cpi->sf.disable_inter_mode_mask[bsize];
 
   vpx_memcpy(t_above, pd->above_context, sizeof(t_above));
   vpx_memcpy(t_left, pd->left_context, sizeof(t_left));
@@ -1719,7 +1707,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
     for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
       // TODO(jingning,rbultje): rewrite the rate-distortion optimization
       // loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop
-      int_mv mode_mv[MB_MODE_COUNT], second_mode_mv[MB_MODE_COUNT];
+      int_mv mode_mv[MB_MODE_COUNT][2];
       int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
       MB_PREDICTION_MODE mode_selected = ZEROMV;
       int64_t best_rd = INT64_MAX;
@@ -1741,11 +1729,12 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
 
         mode_idx = INTER_OFFSET(this_mode);
         bsi->rdstat[i][mode_idx].brdcost = INT64_MAX;
-        if (cpi->sf.disable_inter_mode_mask[bsize] & (1 << mode_idx))
+        if (disable_inter_mode_mask & (1 << mode_idx))
           continue;
 
         // if we're near/nearest and mv == 0,0, compare to zeromv
-        if ((this_mode == NEARMV || this_mode == NEARESTMV ||
+        if (!(disable_inter_mode_mask & (1 << INTER_OFFSET(ZEROMV))) &&
+            (this_mode == NEARMV || this_mode == NEARESTMV ||
              this_mode == ZEROMV) &&
             frame_mv[this_mode][mbmi->ref_frame[0]].as_int == 0 &&
             (!has_second_rf ||
@@ -1790,7 +1779,7 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
         // motion search for newmv (single predictor case only)
         if (!has_second_rf && this_mode == NEWMV &&
             seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) {
-          int_mv *const new_mv = &mode_mv[NEWMV];
+          int_mv *const new_mv = &mode_mv[NEWMV][0];
           int step_param = 0;
           int further_steps;
           int thissme, bestsme = INT_MAX;
@@ -1848,18 +1837,30 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
                                      sadpb, 1, v_fn_ptr, 1,
                                      &bsi->ref_mv[0]->as_mv,
                                      &new_mv->as_mv);
+            if (bestsme < INT_MAX)
+              bestsme = vp9_get_mvpred_var(x, &new_mv->as_mv,
+                                           &bsi->ref_mv[0]->as_mv,
+                                           v_fn_ptr, 1);
           } else if (cpi->sf.search_method == SQUARE) {
             bestsme = vp9_square_search(x, &mvp_full,
                                         step_param,
                                         sadpb, 1, v_fn_ptr, 1,
                                         &bsi->ref_mv[0]->as_mv,
                                         &new_mv->as_mv);
+            if (bestsme < INT_MAX)
+              bestsme = vp9_get_mvpred_var(x, &new_mv->as_mv,
+                                           &bsi->ref_mv[0]->as_mv,
+                                           v_fn_ptr, 1);
           } else if (cpi->sf.search_method == BIGDIA) {
             bestsme = vp9_bigdia_search(x, &mvp_full,
                                         step_param,
                                         sadpb, 1, v_fn_ptr, 1,
                                         &bsi->ref_mv[0]->as_mv,
                                         &new_mv->as_mv);
+            if (bestsme < INT_MAX)
+              bestsme = vp9_get_mvpred_var(x, &new_mv->as_mv,
+                                           &bsi->ref_mv[0]->as_mv,
+                                           v_fn_ptr, 1);
           } else {
             bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
                                              sadpb, further_steps, 0, v_fn_ptr,
@@ -1938,55 +1939,43 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
         }
 
         bsi->rdstat[i][mode_idx].brate =
-            labels2mode(x, i, this_mode, &mode_mv[this_mode],
-                        &second_mode_mv[this_mode], frame_mv, seg_mvs[i],
-                        bsi->ref_mv[0], bsi->ref_mv[1], x->nmvjointcost,
-                        x->mvcost, cpi);
-
-
-        bsi->rdstat[i][mode_idx].mvs[0].as_int = mode_mv[this_mode].as_int;
-        if (num_4x4_blocks_wide > 1)
-          bsi->rdstat[i + 1][mode_idx].mvs[0].as_int =
-              mode_mv[this_mode].as_int;
-        if (num_4x4_blocks_high > 1)
-          bsi->rdstat[i + 2][mode_idx].mvs[0].as_int =
-              mode_mv[this_mode].as_int;
-        if (has_second_rf) {
-          bsi->rdstat[i][mode_idx].mvs[1].as_int =
-              second_mode_mv[this_mode].as_int;
+            labels2mode(cpi, xd, i, this_mode, mode_mv[this_mode], frame_mv,
+                        seg_mvs[i], bsi->ref_mv, x->nmvjointcost, x->mvcost);
+
+        for (ref = 0; ref < 1 + has_second_rf; ++ref) {
+          bsi->rdstat[i][mode_idx].mvs[ref].as_int =
+              mode_mv[this_mode][ref].as_int;
           if (num_4x4_blocks_wide > 1)
-            bsi->rdstat[i + 1][mode_idx].mvs[1].as_int =
-                second_mode_mv[this_mode].as_int;
+            bsi->rdstat[i + 1][mode_idx].mvs[ref].as_int =
+                mode_mv[this_mode][ref].as_int;
           if (num_4x4_blocks_high > 1)
-            bsi->rdstat[i + 2][mode_idx].mvs[1].as_int =
-                second_mode_mv[this_mode].as_int;
+            bsi->rdstat[i + 2][mode_idx].mvs[ref].as_int =
+                mode_mv[this_mode][ref].as_int;
         }
 
         // Trap vectors that reach beyond the UMV borders
-        if (mv_check_bounds(x, &mode_mv[this_mode].as_mv) ||
+        if (mv_check_bounds(x, &mode_mv[this_mode][0].as_mv) ||
             (has_second_rf &&
-             mv_check_bounds(x, &second_mode_mv[this_mode].as_mv)))
+             mv_check_bounds(x, &mode_mv[this_mode][1].as_mv)))
           continue;
 
         if (filter_idx > 0) {
           BEST_SEG_INFO *ref_bsi = bsi_buf;
-          subpelmv = mv_has_subpel(&mode_mv[this_mode].as_mv);
-          have_ref = mode_mv[this_mode].as_int ==
-                         ref_bsi->rdstat[i][mode_idx].mvs[0].as_int;
-          if (has_second_rf) {
-            subpelmv |= mv_has_subpel(&second_mode_mv[this_mode].as_mv);
-            have_ref &= second_mode_mv[this_mode].as_int ==
-                            ref_bsi->rdstat[i][mode_idx].mvs[1].as_int;
+          subpelmv = 0;
+          have_ref = 1;
+
+          for (ref = 0; ref < 1 + has_second_rf; ++ref) {
+            subpelmv |= mv_has_subpel(&mode_mv[this_mode][ref].as_mv);
+            have_ref &= mode_mv[this_mode][ref].as_int ==
+                ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int;
           }
 
           if (filter_idx > 1 && !subpelmv && !have_ref) {
             ref_bsi = bsi_buf + 1;
-            have_ref = mode_mv[this_mode].as_int ==
-                       ref_bsi->rdstat[i][mode_idx].mvs[0].as_int;
-            if (has_second_rf) {
-              have_ref  &= second_mode_mv[this_mode].as_int ==
-                           ref_bsi->rdstat[i][mode_idx].mvs[1].as_int;
-            }
+            have_ref = 1;
+            for (ref = 0; ref < 1 + has_second_rf; ++ref)
+              have_ref &= mode_mv[this_mode][ref].as_int ==
+                  ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int;
           }
 
           if (!subpelmv && have_ref &&
@@ -2047,10 +2036,9 @@ static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
       vpx_memcpy(t_above, bsi->rdstat[i][mode_idx].ta, sizeof(t_above));
       vpx_memcpy(t_left, bsi->rdstat[i][mode_idx].tl, sizeof(t_left));
 
-      labels2mode(x, i, mode_selected, &mode_mv[mode_selected],
-                  &second_mode_mv[mode_selected], frame_mv, seg_mvs[i],
-                  bsi->ref_mv[0], bsi->ref_mv[1], x->nmvjointcost,
-                  x->mvcost, cpi);
+      labels2mode(cpi, xd, i, mode_selected, mode_mv[mode_selected],
+                  frame_mv, seg_mvs[i], bsi->ref_mv, x->nmvjointcost,
+                  x->mvcost);
 
       br += bsi->rdstat[i][mode_idx].brate;
       bd += bsi->rdstat[i][mode_idx].bdist;
@@ -2470,21 +2458,33 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
   further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
 
   if (cpi->sf.search_method == FAST_HEX) {
-    bestsme = vp9_fast_hex_search(x, &mvp_full, step_param, sadpb,
+    bestsme = vp9_fast_hex_search(x, &mvp_full, step_param, sadpb, 0,
                                   &cpi->fn_ptr[bsize], 1,
                                   &ref_mv, &tmp_mv->as_mv);
+    if (bestsme < INT_MAX)
+      bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv,
+                                   &cpi->fn_ptr[bsize], 1);
   } else if (cpi->sf.search_method == HEX) {
     bestsme = vp9_hex_search(x, &mvp_full, step_param, sadpb, 1,
                              &cpi->fn_ptr[bsize], 1,
                              &ref_mv, &tmp_mv->as_mv);
+    if (bestsme < INT_MAX)
+      bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv,
+                                   &cpi->fn_ptr[bsize], 1);
   } else if (cpi->sf.search_method == SQUARE) {
     bestsme = vp9_square_search(x, &mvp_full, step_param, sadpb, 1,
                                 &cpi->fn_ptr[bsize], 1,
                                 &ref_mv, &tmp_mv->as_mv);
+    if (bestsme < INT_MAX)
+      bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv,
+                                   &cpi->fn_ptr[bsize], 1);
   } else if (cpi->sf.search_method == BIGDIA) {
     bestsme = vp9_bigdia_search(x, &mvp_full, step_param, sadpb, 1,
                                 &cpi->fn_ptr[bsize], 1,
                                 &ref_mv, &tmp_mv->as_mv);
+    if (bestsme < INT_MAX)
+      bestsme = vp9_get_mvpred_var(x, &tmp_mv->as_mv, &ref_mv,
+                                   &cpi->fn_ptr[bsize], 1);
   } else {
     bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
                                      sadpb, further_steps, 1,
@@ -2611,6 +2611,9 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
                                        x->nmvjointcost, x->mvcost,
                                        &ref_mv[id].as_mv, second_pred,
                                        pw, ph);
+    if (bestsme < INT_MAX)
+      bestsme = vp9_get_mvpred_av_var(x, &tmp_mv.as_mv, &ref_mv[id].as_mv,
+                                      second_pred, &cpi->fn_ptr[bsize], 1);
 
     x->mv_col_min = tmp_col_min;
     x->mv_col_max = tmp_col_max;
@@ -3158,6 +3161,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
   const int mode_search_skip_flags = cpi->sf.mode_search_skip_flags;
   const int intra_y_mode_mask =
       cpi->sf.intra_y_mode_mask[max_txsize_lookup[bsize]];
+  const int disable_inter_mode_mask = cpi->sf.disable_inter_mode_mask[bsize];
 
   x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
 
@@ -3256,6 +3260,10 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
     mode_skip_mask |= new_modes_mask;
   }
 
+  if (bsize > cpi->sf.max_intra_bsize) {
+    mode_skip_mask |= 0xFF30808;
+  }
+
   for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
     int mode_excluded = 0;
     int64_t this_rd = INT64_MAX;
@@ -3302,7 +3310,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
     this_mode = vp9_mode_order[mode_index].mode;
     ref_frame = vp9_mode_order[mode_index].ref_frame[0];
     if (ref_frame != INTRA_FRAME &&
-        cpi->sf.disable_inter_mode_mask[bsize] & (1 << INTER_OFFSET(this_mode)))
+        disable_inter_mode_mask & (1 << INTER_OFFSET(this_mode)))
       continue;
     second_ref_frame = vp9_mode_order[mode_index].ref_frame[1];
 
@@ -3346,7 +3354,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
       }
     } else {
       // if we're near/nearest and mv == 0,0, compare to zeromv
-      if ((this_mode == NEARMV || this_mode == NEARESTMV ||
+      if (!(disable_inter_mode_mask & (1 << INTER_OFFSET(ZEROMV))) &&
+          (this_mode == NEARMV || this_mode == NEARESTMV ||
           this_mode == ZEROMV) &&
           frame_mv[this_mode][ref_frame].as_int == 0 &&
           !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) &&