src/third_party/libvpx/source/libvpx/vp9/encoder/vp9_rdopt.c

   1 /*
   2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #include <assert.h>
  12 #include <math.h>
  13
  14 #include "./vp9_rtcd.h"
  15
  16 #include "vpx_mem/vpx_mem.h"
  17
  18 #include "vp9/common/vp9_common.h"
  19 #include "vp9/common/vp9_entropy.h"
  20 #include "vp9/common/vp9_entropymode.h"
  21 #include "vp9/common/vp9_idct.h"
  22 #include "vp9/common/vp9_mvref_common.h"
  23 #include "vp9/common/vp9_pred_common.h"
  24 #include "vp9/common/vp9_quant_common.h"
  25 #include "vp9/common/vp9_reconinter.h"
  26 #include "vp9/common/vp9_reconintra.h"
  27 #include "vp9/common/vp9_seg_common.h"
  28 #include "vp9/common/vp9_systemdependent.h"
  29
  30 #include "vp9/encoder/vp9_cost.h"
  31 #include "vp9/encoder/vp9_encodemb.h"
  32 #include "vp9/encoder/vp9_encodemv.h"
  33 #include "vp9/encoder/vp9_encoder.h"
  34 #include "vp9/encoder/vp9_mcomp.h"
  35 #include "vp9/encoder/vp9_quantize.h"
  36 #include "vp9/encoder/vp9_ratectrl.h"
  37 #include "vp9/encoder/vp9_rd.h"
  38 #include "vp9/encoder/vp9_rdopt.h"
  39 #include "vp9/encoder/vp9_variance.h"
  40
  41 #define RD_THRESH_MAX_FACT 64
  42 #define RD_THRESH_INC      1
  43
  44 #define LAST_FRAME_MODE_MASK    ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | \
  45                                  (1 << INTRA_FRAME))
  46 #define GOLDEN_FRAME_MODE_MASK  ((1 << LAST_FRAME) | (1 << ALTREF_FRAME) | \
  47                                  (1 << INTRA_FRAME))
  48 #define ALT_REF_MODE_MASK       ((1 << LAST_FRAME) | (1 << GOLDEN_FRAME) | \
  49                                  (1 << INTRA_FRAME))
  50
  51 #define SECOND_REF_FRAME_MASK   ((1 << ALTREF_FRAME) | 0x01)
  52
  53 #define MIN_EARLY_TERM_INDEX    3
  54
  55 typedef struct {
  56   PREDICTION_MODE mode;
  57   MV_REFERENCE_FRAME ref_frame[2];
  58 } MODE_DEFINITION;
  59
  60 typedef struct {
  61   MV_REFERENCE_FRAME ref_frame[2];
  62 } REF_DEFINITION;
  63
  64 struct rdcost_block_args {
  65   MACROBLOCK *x;
  66   ENTROPY_CONTEXT t_above[16];
  67   ENTROPY_CONTEXT t_left[16];
  68   int rate;
  69   int64_t dist;
  70   int64_t sse;
  71   int this_rate;
  72   int64_t this_dist;
  73   int64_t this_sse;
  74   int64_t this_rd;
  75   int64_t best_rd;
  76   int skip;
  77   int use_fast_coef_costing;
  78   const scan_order *so;
  79 };
  80
  81 static const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
  82   {NEARESTMV, {LAST_FRAME,   NONE}},
  83   {NEARESTMV, {ALTREF_FRAME, NONE}},
  84   {NEARESTMV, {GOLDEN_FRAME, NONE}},
  85
  86   {DC_PRED,   {INTRA_FRAME,  NONE}},
  87
  88   {NEWMV,     {LAST_FRAME,   NONE}},
  89   {NEWMV,     {ALTREF_FRAME, NONE}},
  90   {NEWMV,     {GOLDEN_FRAME, NONE}},
  91
  92   {NEARMV,    {LAST_FRAME,   NONE}},
  93   {NEARMV,    {ALTREF_FRAME, NONE}},
  94   {NEARMV,    {GOLDEN_FRAME, NONE}},
  95
  96   {ZEROMV,    {LAST_FRAME,   NONE}},
  97   {ZEROMV,    {GOLDEN_FRAME, NONE}},
  98   {ZEROMV,    {ALTREF_FRAME, NONE}},
  99
 100   {NEARESTMV, {LAST_FRAME,   ALTREF_FRAME}},
 101   {NEARESTMV, {GOLDEN_FRAME, ALTREF_FRAME}},
 102
 103   {TM_PRED,   {INTRA_FRAME,  NONE}},
 104
 105   {NEARMV,    {LAST_FRAME,   ALTREF_FRAME}},
 106   {NEWMV,     {LAST_FRAME,   ALTREF_FRAME}},
 107   {NEARMV,    {GOLDEN_FRAME, ALTREF_FRAME}},
 108   {NEWMV,     {GOLDEN_FRAME, ALTREF_FRAME}},
 109
 110   {ZEROMV,    {LAST_FRAME,   ALTREF_FRAME}},
 111   {ZEROMV,    {GOLDEN_FRAME, ALTREF_FRAME}},
 112
 113   {H_PRED,    {INTRA_FRAME,  NONE}},
 114   {V_PRED,    {INTRA_FRAME,  NONE}},
 115   {D135_PRED, {INTRA_FRAME,  NONE}},
 116   {D207_PRED, {INTRA_FRAME,  NONE}},
 117   {D153_PRED, {INTRA_FRAME,  NONE}},
 118   {D63_PRED,  {INTRA_FRAME,  NONE}},
 119   {D117_PRED, {INTRA_FRAME,  NONE}},
 120   {D45_PRED,  {INTRA_FRAME,  NONE}},
 121 };
 122
 123 static const REF_DEFINITION vp9_ref_order[MAX_REFS] = {
 124   {{LAST_FRAME,   NONE}},
 125   {{GOLDEN_FRAME, NONE}},
 126   {{ALTREF_FRAME, NONE}},
 127   {{LAST_FRAME,   ALTREF_FRAME}},
 128   {{GOLDEN_FRAME, ALTREF_FRAME}},
 129   {{INTRA_FRAME,  NONE}},
 130 };
 131
 132 static int raster_block_offset(BLOCK_SIZE plane_bsize,
 133                                int raster_block, int stride) {
 134   const int bw = b_width_log2(plane_bsize);
 135   const int y = 4 * (raster_block >> bw);
 136   const int x = 4 * (raster_block & ((1 << bw) - 1));
 137   return y * stride + x;
 138 }
 139 static int16_t* raster_block_offset_int16(BLOCK_SIZE plane_bsize,
 140                                           int raster_block, int16_t *base) {
 141   const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
 142   return base + raster_block_offset(plane_bsize, raster_block, stride);
 143 }
 144
 145 static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
 146                            int m, int n, int min_plane, int max_plane) {
 147   int i;
 148
 149   for (i = min_plane; i < max_plane; ++i) {
 150     struct macroblock_plane *const p = &x->plane[i];
 151     struct macroblockd_plane *const pd = &x->e_mbd.plane[i];
 152
 153     p->coeff    = ctx->coeff_pbuf[i][m];
 154     p->qcoeff   = ctx->qcoeff_pbuf[i][m];
 155     pd->dqcoeff = ctx->dqcoeff_pbuf[i][m];
 156     p->eobs     = ctx->eobs_pbuf[i][m];
 157
 158     ctx->coeff_pbuf[i][m]   = ctx->coeff_pbuf[i][n];
 159     ctx->qcoeff_pbuf[i][m]  = ctx->qcoeff_pbuf[i][n];
 160     ctx->dqcoeff_pbuf[i][m] = ctx->dqcoeff_pbuf[i][n];
 161     ctx->eobs_pbuf[i][m]    = ctx->eobs_pbuf[i][n];
 162
 163     ctx->coeff_pbuf[i][n]   = p->coeff;
 164     ctx->qcoeff_pbuf[i][n]  = p->qcoeff;
 165     ctx->dqcoeff_pbuf[i][n] = pd->dqcoeff;
 166     ctx->eobs_pbuf[i][n]    = p->eobs;
 167   }
 168 }
 169
 170 static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
 171                             MACROBLOCK *x, MACROBLOCKD *xd,
 172                             int *out_rate_sum, int64_t *out_dist_sum) {
 173   // Note our transform coeffs are 8 times an orthogonal transform.
 174   // Hence quantizer step is also 8 times. To get effective quantizer
 175   // we need to divide by 8 before sending to modeling function.
 176   int i;
 177   int64_t rate_sum = 0;
 178   int64_t dist_sum = 0;
 179   const int ref = xd->mi[0].src_mi->mbmi.ref_frame[0];
 180   unsigned int sse;
 181   unsigned int var = 0;
 182   unsigned int sum_sse = 0;
 183   const int shift = 8;
 184   int rate;
 185   int64_t dist;
 186
 187   x->pred_sse[ref] = 0;
 188
 189   for (i = 0; i < MAX_MB_PLANE; ++i) {
 190     struct macroblock_plane *const p = &x->plane[i];
 191     struct macroblockd_plane *const pd = &xd->plane[i];
 192     const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
 193     const TX_SIZE max_tx_size = max_txsize_lookup[bs];
 194     const BLOCK_SIZE unit_size = txsize_to_bsize[max_tx_size];
 195     int bw = 1 << (b_width_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
 196     int bh = 1 << (b_height_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
 197     int idx, idy;
 198     int lw = b_width_log2_lookup[unit_size] + 2;
 199     int lh = b_height_log2_lookup[unit_size] + 2;
 200
 201     sum_sse = 0;
 202
 203     for (idy = 0; idy < bh; ++idy) {
 204       for (idx = 0; idx < bw; ++idx) {
 205         uint8_t *src = p->src.buf + (idy * p->src.stride << lh) + (idx << lw);
 206         uint8_t *dst = pd->dst.buf + (idy * pd->dst.stride << lh) + (idx << lh);
 207         int block_idx = (idy << 1) + idx;
 208
 209         var = cpi->fn_ptr[unit_size].vf(src, p->src.stride,
 210                                         dst, pd->dst.stride, &sse);
 211         x->bsse[(i << 2) + block_idx] = sse;
 212         sum_sse += sse;
 213
 214         if (!x->select_tx_size) {
 215           if (x->bsse[(i << 2) + block_idx] < p->quant_thred[0] >> shift)
 216             x->skip_txfm[(i << 2) + block_idx] = 1;
 217           else if (var < p->quant_thred[1] >> shift)
 218             x->skip_txfm[(i << 2) + block_idx] = 2;
 219           else
 220             x->skip_txfm[(i << 2) + block_idx] = 0;
 221         }
 222
 223         if (i == 0)
 224           x->pred_sse[ref] += sse;
 225       }
 226     }
 227
 228     // Fast approximate the modelling function.
 229     if (cpi->oxcf.speed > 4) {
 230       int64_t rate;
 231       int64_t dist;
 232       int64_t square_error = sse;
 233       int quantizer = (pd->dequant[1] >> 3);
 234
 235       if (quantizer < 120)
 236         rate = (square_error * (280 - quantizer)) >> 8;
 237       else
 238         rate = 0;
 239       dist = (square_error * quantizer) >> 8;
 240       rate_sum += rate;
 241       dist_sum += dist;
 242     } else {
 243       vp9_model_rd_from_var_lapndz(sum_sse, 1 << num_pels_log2_lookup[bs],
 244                                    pd->dequant[1] >> 3, &rate, &dist);
 245       rate_sum += rate;
 246       dist_sum += dist;
 247     }
 248   }
 249
 250   *out_rate_sum = (int)rate_sum;
 251   *out_dist_sum = dist_sum << 4;
 252 }
 253
 254 int64_t vp9_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
 255                           intptr_t block_size, int64_t *ssz) {
 256   int i;
 257   int64_t error = 0, sqcoeff = 0;
 258
 259   for (i = 0; i < block_size; i++) {
 260     const int diff = coeff[i] - dqcoeff[i];
 261     error +=  diff * diff;
 262     sqcoeff += coeff[i] * coeff[i];
 263   }
 264
 265   *ssz = sqcoeff;
 266   return error;
 267 }
 268
 269 /* The trailing '0' is a terminator which is used inside cost_coeffs() to
 270  * decide whether to include cost of a trailing EOB node or not (i.e. we
 271  * can skip this if the last coefficient in this transform block, e.g. the
 272  * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
 273  * were non-zero). */
 274 static const int16_t band_counts[TX_SIZES][8] = {
 275   { 1, 2, 3, 4,  3,   16 - 13, 0 },
 276   { 1, 2, 3, 4, 11,   64 - 21, 0 },
 277   { 1, 2, 3, 4, 11,  256 - 21, 0 },
 278   { 1, 2, 3, 4, 11, 1024 - 21, 0 },
 279 };
 280 static INLINE int cost_coeffs(MACROBLOCK *x,
 281                               int plane, int block,
 282                               ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
 283                               TX_SIZE tx_size,
 284                               const int16_t *scan, const int16_t *nb,
 285                               int use_fast_coef_costing) {
 286   MACROBLOCKD *const xd = &x->e_mbd;
 287   MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
 288   const struct macroblock_plane *p = &x->plane[plane];
 289   const struct macroblockd_plane *pd = &xd->plane[plane];
 290   const PLANE_TYPE type = pd->plane_type;
 291   const int16_t *band_count = &band_counts[tx_size][1];
 292   const int eob = p->eobs[block];
 293   const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
 294   unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
 295                    x->token_costs[tx_size][type][is_inter_block(mbmi)];
 296   uint8_t token_cache[32 * 32];
 297   int pt = combine_entropy_contexts(*A, *L);
 298   int c, cost;
 299   // Check for consistency of tx_size with mode info
 300   assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size
 301                               : get_uv_tx_size(mbmi, pd) == tx_size);
 302
 303   if (eob == 0) {
 304     // single eob token
 305     cost = token_costs[0][0][pt][EOB_TOKEN];
 306     c = 0;
 307   } else {
 308     int band_left = *band_count++;
 309
 310     // dc token
 311     int v = qcoeff[0];
 312     int prev_t = vp9_dct_value_tokens_ptr[v].token;
 313     cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
 314     token_cache[0] = vp9_pt_energy_class[prev_t];
 315     ++token_costs;
 316
 317     // ac tokens
 318     for (c = 1; c < eob; c++) {
 319       const int rc = scan[c];
 320       int t;
 321
 322       v = qcoeff[rc];
 323       t = vp9_dct_value_tokens_ptr[v].token;
 324       if (use_fast_coef_costing) {
 325         cost += (*token_costs)[!prev_t][!prev_t][t] + vp9_dct_value_cost_ptr[v];
 326       } else {
 327         pt = get_coef_context(nb, token_cache, c);
 328         cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v];
 329         token_cache[rc] = vp9_pt_energy_class[t];
 330       }
 331       prev_t = t;
 332       if (!--band_left) {
 333         band_left = *band_count++;
 334         ++token_costs;
 335       }
 336     }
 337
 338     // eob token
 339     if (band_left) {
 340       if (use_fast_coef_costing) {
 341         cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
 342       } else {
 343         pt = get_coef_context(nb, token_cache, c);
 344         cost += (*token_costs)[0][pt][EOB_TOKEN];
 345       }
 346     }
 347   }
 348
 349   // is eob first coefficient;
 350   *A = *L = (c > 0);
 351
 352   return cost;
 353 }
 354 static void dist_block(int plane, int block, TX_SIZE tx_size,
 355                        struct rdcost_block_args* args) {
 356   const int ss_txfrm_size = tx_size << 1;
 357   MACROBLOCK* const x = args->x;
 358   MACROBLOCKD* const xd = &x->e_mbd;
 359   const struct macroblock_plane *const p = &x->plane[plane];
 360   const struct macroblockd_plane *const pd = &xd->plane[plane];
 361   int64_t this_sse;
 362   int shift = tx_size == TX_32X32 ? 0 : 2;
 363   tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
 364   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
 365   args->dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
 366                                &this_sse) >> shift;
 367   args->sse  = this_sse >> shift;
 368
 369   if (x->skip_encode && !is_inter_block(&xd->mi[0].src_mi->mbmi)) {
 370     // TODO(jingning): tune the model to better capture the distortion.
 371     int64_t p = (pd->dequant[1] * pd->dequant[1] *
 372                     (1 << ss_txfrm_size)) >> (shift + 2);
 373     args->dist += (p >> 4);
 374     args->sse  += p;
 375   }
 376 }
 377
 378 static void rate_block(int plane, int block, BLOCK_SIZE plane_bsize,
 379                        TX_SIZE tx_size, struct rdcost_block_args* args) {
 380   int x_idx, y_idx;
 381   txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x_idx, &y_idx);
 382
 383   args->rate = cost_coeffs(args->x, plane, block, args->t_above + x_idx,
 384                            args->t_left + y_idx, tx_size,
 385                            args->so->scan, args->so->neighbors,
 386                            args->use_fast_coef_costing);
 387 }
 388
 389 static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
 390                           TX_SIZE tx_size, void *arg) {
 391   struct rdcost_block_args *args = arg;
 392   MACROBLOCK *const x = args->x;
 393   MACROBLOCKD *const xd = &x->e_mbd;
 394   MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
 395   int64_t rd1, rd2, rd;
 396
 397   if (args->skip)
 398     return;
 399
 400   if (!is_inter_block(mbmi)) {
 401     vp9_encode_block_intra(x, plane, block, plane_bsize, tx_size, &mbmi->skip);
 402     dist_block(plane, block, tx_size, args);
 403   } else if (max_txsize_lookup[plane_bsize] == tx_size) {
 404     if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 0) {
 405       // full forward transform and quantization
 406       vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
 407       dist_block(plane, block, tx_size, args);
 408     } else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 2) {
 409       // compute DC coefficient
 410       tran_low_t *const coeff   = BLOCK_OFFSET(x->plane[plane].coeff, block);
 411       tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block);
 412       vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size);
 413       args->sse  = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
 414       args->dist = args->sse;
 415       if (!x->plane[plane].eobs[block])
 416         args->dist = args->sse - ((coeff[0] * coeff[0] -
 417             (coeff[0] - dqcoeff[0]) * (coeff[0] - dqcoeff[0])) >> 2);
 418     } else {
 419       // skip forward transform
 420       x->plane[plane].eobs[block] = 0;
 421       args->sse  = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
 422       args->dist = args->sse;
 423     }
 424   } else {
 425     // full forward transform and quantization
 426     vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
 427     dist_block(plane, block, tx_size, args);
 428   }
 429
 430   rate_block(plane, block, plane_bsize, tx_size, args);
 431   rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist);
 432   rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse);
 433
 434   // TODO(jingning): temporarily enabled only for luma component
 435   rd = MIN(rd1, rd2);
 436   if (plane == 0)
 437     x->zcoeff_blk[tx_size][block] = !x->plane[plane].eobs[block] ||
 438                                     (rd1 > rd2 && !xd->lossless);
 439
 440   args->this_rate += args->rate;
 441   args->this_dist += args->dist;
 442   args->this_sse  += args->sse;
 443   args->this_rd += rd;
 444
 445   if (args->this_rd > args->best_rd) {
 446     args->skip = 1;
 447     return;
 448   }
 449 }
 450
 451 static void txfm_rd_in_plane(MACROBLOCK *x,
 452                              int *rate, int64_t *distortion,
 453                              int *skippable, int64_t *sse,
 454                              int64_t ref_best_rd, int plane,
 455                              BLOCK_SIZE bsize, TX_SIZE tx_size,
 456                              int use_fast_coef_casting) {
 457   MACROBLOCKD *const xd = &x->e_mbd;
 458   const struct macroblockd_plane *const pd = &xd->plane[plane];
 459   struct rdcost_block_args args;
 460   vp9_zero(args);
 461   args.x = x;
 462   args.best_rd = ref_best_rd;
 463   args.use_fast_coef_costing = use_fast_coef_casting;
 464
 465   if (plane == 0)
 466     xd->mi[0].src_mi->mbmi.tx_size = tx_size;
 467
 468   vp9_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
 469
 470   args.so = get_scan(xd, tx_size, pd->plane_type, 0);
 471
 472   vp9_foreach_transformed_block_in_plane(xd, bsize, plane,
 473                                          block_rd_txfm, &args);
 474   if (args.skip) {
 475     *rate       = INT_MAX;
 476     *distortion = INT64_MAX;
 477     *sse        = INT64_MAX;
 478     *skippable  = 0;
 479   } else {
 480     *distortion = args.this_dist;
 481     *rate       = args.this_rate;
 482     *sse        = args.this_sse;
 483     *skippable  = vp9_is_skippable_in_plane(x, bsize, plane);
 484   }
 485 }
 486
 487 static void choose_largest_tx_size(VP9_COMP *cpi, MACROBLOCK *x,
 488                                    int *rate, int64_t *distortion,
 489                                    int *skip, int64_t *sse,
 490                                    int64_t ref_best_rd,
 491                                    BLOCK_SIZE bs) {
 492   const TX_SIZE max_tx_size = max_txsize_lookup[bs];
 493   VP9_COMMON *const cm = &cpi->common;
 494   const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
 495   MACROBLOCKD *const xd = &x->e_mbd;
 496   MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
 497
 498   mbmi->tx_size = MIN(max_tx_size, largest_tx_size);
 499
 500   txfm_rd_in_plane(x, rate, distortion, skip,
 501                    sse, ref_best_rd, 0, bs,
 502                    mbmi->tx_size, cpi->sf.use_fast_coef_costing);
 503 }
 504
 505 static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
 506                                    int *rate,
 507                                    int64_t *distortion,
 508                                    int *skip,
 509                                    int64_t *psse,
 510                                    int64_t tx_cache[TX_MODES],
 511                                    int64_t ref_best_rd,
 512                                    BLOCK_SIZE bs) {
 513   const TX_SIZE max_tx_size = max_txsize_lookup[bs];
 514   VP9_COMMON *const cm = &cpi->common;
 515   MACROBLOCKD *const xd = &x->e_mbd;
 516   MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
 517   vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
 518   int r[TX_SIZES][2], s[TX_SIZES];
 519   int64_t d[TX_SIZES], sse[TX_SIZES];
 520   int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX},
 521                              {INT64_MAX, INT64_MAX},
 522                              {INT64_MAX, INT64_MAX},
 523                              {INT64_MAX, INT64_MAX}};
 524   int n, m;
 525   int s0, s1;
 526   const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
 527   int64_t best_rd = INT64_MAX;
 528   TX_SIZE best_tx = max_tx_size;
 529
 530   const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs);
 531   assert(skip_prob > 0);
 532   s0 = vp9_cost_bit(skip_prob, 0);
 533   s1 = vp9_cost_bit(skip_prob, 1);
 534
 535   for (n = max_tx_size; n >= 0;  n--) {
 536     txfm_rd_in_plane(x, &r[n][0], &d[n], &s[n],
 537                      &sse[n], ref_best_rd, 0, bs, n,
 538                      cpi->sf.use_fast_coef_costing);
 539     r[n][1] = r[n][0];
 540     if (r[n][0] < INT_MAX) {
 541       for (m = 0; m <= n - (n == (int) max_tx_size); m++) {
 542         if (m == n)
 543           r[n][1] += vp9_cost_zero(tx_probs[m]);
 544         else
 545           r[n][1] += vp9_cost_one(tx_probs[m]);
 546       }
 547     }
 548     if (d[n] == INT64_MAX) {
 549       rd[n][0] = rd[n][1] = INT64_MAX;
 550     } else if (s[n]) {
 551       rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
 552     } else {
 553       rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
 554       rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
 555     }
 556
 557     // Early termination in transform size search.
 558     if (cpi->sf.tx_size_search_breakout &&
 559         (rd[n][1] == INT64_MAX ||
 560         (n < (int) max_tx_size && rd[n][1] > rd[n + 1][1]) ||
 561         s[n] == 1))
 562       break;
 563
 564     if (rd[n][1] < best_rd) {
 565       best_tx = n;
 566       best_rd = rd[n][1];
 567     }
 568   }
 569   mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ?
 570                       best_tx : MIN(max_tx_size, max_mode_tx_size);
 571
 572
 573   *distortion = d[mbmi->tx_size];
 574   *rate       = r[mbmi->tx_size][cm->tx_mode == TX_MODE_SELECT];
 575   *skip       = s[mbmi->tx_size];
 576   *psse       = sse[mbmi->tx_size];
 577
 578   tx_cache[ONLY_4X4] = rd[TX_4X4][0];
 579   tx_cache[ALLOW_8X8] = rd[TX_8X8][0];
 580   tx_cache[ALLOW_16X16] = rd[MIN(max_tx_size, TX_16X16)][0];
 581   tx_cache[ALLOW_32X32] = rd[MIN(max_tx_size, TX_32X32)][0];
 582
 583   if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
 584     tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
 585   } else if (max_tx_size >= TX_16X16 && best_tx == TX_16X16) {
 586     tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
 587   } else if (rd[TX_8X8][1] < rd[TX_4X4][1]) {
 588     tx_cache[TX_MODE_SELECT] = rd[TX_8X8][1];
 589   } else {
 590     tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1];
 591   }
 592 }
 593
 594 static void super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
 595                             int64_t *distortion, int *skip,
 596                             int64_t *psse, BLOCK_SIZE bs,
 597                             int64_t txfm_cache[TX_MODES],
 598                             int64_t ref_best_rd) {
 599   MACROBLOCKD *xd = &x->e_mbd;
 600   int64_t sse;
 601   int64_t *ret_sse = psse ? psse : &sse;
 602
 603   assert(bs == xd->mi[0].src_mi->mbmi.sb_type);
 604
 605   if (cpi->sf.tx_size_search_method == USE_LARGESTALL || xd->lossless) {
 606     vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t));
 607     choose_largest_tx_size(cpi, x, rate, distortion, skip, ret_sse, ref_best_rd,
 608                            bs);
 609   } else {
 610     choose_tx_size_from_rd(cpi, x, rate, distortion, skip, ret_sse,
 611                            txfm_cache, ref_best_rd, bs);
 612   }
 613 }
 614
 615 static int conditional_skipintra(PREDICTION_MODE mode,
 616                                  PREDICTION_MODE best_intra_mode) {
 617   if (mode == D117_PRED &&
 618       best_intra_mode != V_PRED &&
 619       best_intra_mode != D135_PRED)
 620     return 1;
 621   if (mode == D63_PRED &&
 622       best_intra_mode != V_PRED &&
 623       best_intra_mode != D45_PRED)
 624     return 1;
 625   if (mode == D207_PRED &&
 626       best_intra_mode != H_PRED &&
 627       best_intra_mode != D45_PRED)
 628     return 1;
 629   if (mode == D153_PRED &&
 630       best_intra_mode != H_PRED &&
 631       best_intra_mode != D135_PRED)
 632     return 1;
 633   return 0;
 634 }
 635
 636 static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
 637                                      PREDICTION_MODE *best_mode,
 638                                      const int *bmode_costs,
 639                                      ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
 640                                      int *bestrate, int *bestratey,
 641                                      int64_t *bestdistortion,
 642                                      BLOCK_SIZE bsize, int64_t rd_thresh) {
 643   PREDICTION_MODE mode;
 644   MACROBLOCKD *const xd = &x->e_mbd;
 645   int64_t best_rd = rd_thresh;
 646
 647   struct macroblock_plane *p = &x->plane[0];
 648   struct macroblockd_plane *pd = &xd->plane[0];
 649   const int src_stride = p->src.stride;
 650   const int dst_stride = pd->dst.stride;
 651   const uint8_t *src_init = &p->src.buf[raster_block_offset(BLOCK_8X8, ib,
 652                                                             src_stride)];
 653   uint8_t *dst_init = &pd->dst.buf[raster_block_offset(BLOCK_8X8, ib,
 654                                                        dst_stride)];
 655   ENTROPY_CONTEXT ta[2], tempa[2];
 656   ENTROPY_CONTEXT tl[2], templ[2];
 657
 658   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
 659   const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
 660   int idx, idy;
 661   uint8_t best_dst[8 * 8];
 662
 663   assert(ib < 4);
 664
 665   vpx_memcpy(ta, a, sizeof(ta));
 666   vpx_memcpy(tl, l, sizeof(tl));
 667   xd->mi[0].src_mi->mbmi.tx_size = TX_4X4;
 668
 669   for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
 670     int64_t this_rd;
 671     int ratey = 0;
 672     int64_t distortion = 0;
 673     int rate = bmode_costs[mode];
 674
 675     if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode)))
 676       continue;
 677
 678     // Only do the oblique modes if the best so far is
 679     // one of the neighboring directional modes
 680     if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
 681       if (conditional_skipintra(mode, *best_mode))
 682           continue;
 683     }
 684
 685     vpx_memcpy(tempa, ta, sizeof(ta));
 686     vpx_memcpy(templ, tl, sizeof(tl));
 687
 688     for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
 689       for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
 690         const int block = ib + idy * 2 + idx;
 691         const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
 692         uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
 693         int16_t *const src_diff = raster_block_offset_int16(BLOCK_8X8, block,
 694                                                             p->src_diff);
 695         tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
 696         xd->mi[0].src_mi->bmi[block].as_mode = mode;
 697         vp9_predict_intra_block(xd, block, 1,
 698                                 TX_4X4, mode,
 699                                 x->skip_encode ? src : dst,
 700                                 x->skip_encode ? src_stride : dst_stride,
 701                                 dst, dst_stride, idx, idy, 0);
 702         vp9_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, dst_stride);
 703
 704         if (xd->lossless) {
 705           const scan_order *so = &vp9_default_scan_orders[TX_4X4];
 706           vp9_fwht4x4(src_diff, coeff, 8);
 707           vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
 708           ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
 709                                so->scan, so->neighbors,
 710                                cpi->sf.use_fast_coef_costing);
 711           if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
 712             goto next;
 713           vp9_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block), dst, dst_stride,
 714                           p->eobs[block]);
 715         } else {
 716           int64_t unused;
 717           const TX_TYPE tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block);
 718           const scan_order *so = &vp9_scan_orders[TX_4X4][tx_type];
 719           vp9_fht4x4(src_diff, coeff, 8, tx_type);
 720           vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
 721           ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
 722                              so->scan, so->neighbors,
 723                              cpi->sf.use_fast_coef_costing);
 724           distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block),
 725                                         16, &unused) >> 2;
 726           if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
 727             goto next;
 728           vp9_iht4x4_add(tx_type, BLOCK_OFFSET(pd->dqcoeff, block),
 729                          dst, dst_stride, p->eobs[block]);
 730         }
 731       }
 732     }
 733
 734     rate += ratey;
 735     this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
 736
 737     if (this_rd < best_rd) {
 738       *bestrate = rate;
 739       *bestratey = ratey;
 740       *bestdistortion = distortion;
 741       best_rd = this_rd;
 742       *best_mode = mode;
 743       vpx_memcpy(a, tempa, sizeof(tempa));
 744       vpx_memcpy(l, templ, sizeof(templ));
 745       for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
 746         vpx_memcpy(best_dst + idy * 8, dst_init + idy * dst_stride,
 747                    num_4x4_blocks_wide * 4);
 748     }
 749   next:
 750     {}
 751   }
 752
 753   if (best_rd >= rd_thresh || x->skip_encode)
 754     return best_rd;
 755
 756   for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
 757     vpx_memcpy(dst_init + idy * dst_stride, best_dst + idy * 8,
 758                num_4x4_blocks_wide * 4);
 759
 760   return best_rd;
 761 }
 762
 763 static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP *cpi, MACROBLOCK *mb,
 764                                             int *rate, int *rate_y,
 765                                             int64_t *distortion,
 766                                             int64_t best_rd) {
 767   int i, j;
 768   const MACROBLOCKD *const xd = &mb->e_mbd;
 769   MODE_INFO *const mic = xd->mi[0].src_mi;
 770   const MODE_INFO *above_mi = xd->mi[-xd->mi_stride].src_mi;
 771   const MODE_INFO *left_mi = xd->left_available ? xd->mi[-1].src_mi : NULL;
 772   const BLOCK_SIZE bsize = xd->mi[0].src_mi->mbmi.sb_type;
 773   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
 774   const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
 775   int idx, idy;
 776   int cost = 0;
 777   int64_t total_distortion = 0;
 778   int tot_rate_y = 0;
 779   int64_t total_rd = 0;
 780   ENTROPY_CONTEXT t_above[4], t_left[4];
 781   const int *bmode_costs = cpi->mbmode_cost;
 782
 783   vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
 784   vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
 785
 786   // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block.
 787   for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
 788     for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
 789       PREDICTION_MODE best_mode = DC_PRED;
 790       int r = INT_MAX, ry = INT_MAX;
 791       int64_t d = INT64_MAX, this_rd = INT64_MAX;
 792       i = idy * 2 + idx;
 793       if (cpi->common.frame_type == KEY_FRAME) {
 794         const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, i);
 795         const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, i);
 796
 797         bmode_costs  = cpi->y_mode_costs[A][L];
 798       }
 799
 800       this_rd = rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs,
 801                                       t_above + idx, t_left + idy, &r, &ry, &d,
 802                                       bsize, best_rd - total_rd);
 803       if (this_rd >= best_rd - total_rd)
 804         return INT64_MAX;
 805
 806       total_rd += this_rd;
 807       cost += r;
 808       total_distortion += d;
 809       tot_rate_y += ry;
 810
 811       mic->bmi[i].as_mode = best_mode;
 812       for (j = 1; j < num_4x4_blocks_high; ++j)
 813         mic->bmi[i + j * 2].as_mode = best_mode;
 814       for (j = 1; j < num_4x4_blocks_wide; ++j)
 815         mic->bmi[i + j].as_mode = best_mode;
 816
 817       if (total_rd >= best_rd)
 818         return INT64_MAX;
 819     }
 820   }
 821
 822   *rate = cost;
 823   *rate_y = tot_rate_y;
 824   *distortion = total_distortion;
 825   mic->mbmi.mode = mic->bmi[3].as_mode;
 826
 827   return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion);
 828 }
 829
 830 static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
 831                                       int *rate, int *rate_tokenonly,
 832                                       int64_t *distortion, int *skippable,
 833                                       BLOCK_SIZE bsize,
 834                                       int64_t tx_cache[TX_MODES],
 835                                       int64_t best_rd) {
 836   PREDICTION_MODE mode;
 837   PREDICTION_MODE mode_selected = DC_PRED;
 838   MACROBLOCKD *const xd = &x->e_mbd;
 839   MODE_INFO *const mic = xd->mi[0].src_mi;
 840   int this_rate, this_rate_tokenonly, s;
 841   int64_t this_distortion, this_rd;
 842   TX_SIZE best_tx = TX_4X4;
 843   int i;
 844   int *bmode_costs = cpi->mbmode_cost;
 845
 846   if (cpi->sf.tx_size_search_method == USE_FULL_RD)
 847     for (i = 0; i < TX_MODES; i++)
 848       tx_cache[i] = INT64_MAX;
 849
 850   /* Y Search for intra prediction mode */
 851   for (mode = DC_PRED; mode <= TM_PRED; mode++) {
 852     int64_t local_tx_cache[TX_MODES];
 853     MODE_INFO *above_mi = xd->mi[-xd->mi_stride].src_mi;
 854     MODE_INFO *left_mi = xd->left_available ? xd->mi[-1].src_mi : NULL;
 855
 856     if (cpi->common.frame_type == KEY_FRAME) {
 857       const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, 0);
 858       const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, 0);
 859
 860       bmode_costs = cpi->y_mode_costs[A][L];
 861     }
 862     mic->mbmi.mode = mode;
 863
 864     super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
 865         &s, NULL, bsize, local_tx_cache, best_rd);
 866
 867     if (this_rate_tokenonly == INT_MAX)
 868       continue;
 869
 870     this_rate = this_rate_tokenonly + bmode_costs[mode];
 871     this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
 872
 873     if (this_rd < best_rd) {
 874       mode_selected   = mode;
 875       best_rd         = this_rd;
 876       best_tx         = mic->mbmi.tx_size;
 877       *rate           = this_rate;
 878       *rate_tokenonly = this_rate_tokenonly;
 879       *distortion     = this_distortion;
 880       *skippable      = s;
 881     }
 882
 883     if (cpi->sf.tx_size_search_method == USE_FULL_RD && this_rd < INT64_MAX) {
 884       for (i = 0; i < TX_MODES && local_tx_cache[i] < INT64_MAX; i++) {
 885         const int64_t adj_rd = this_rd + local_tx_cache[i] -
 886             local_tx_cache[cpi->common.tx_mode];
 887         if (adj_rd < tx_cache[i]) {
 888           tx_cache[i] = adj_rd;
 889         }
 890       }
 891     }
 892   }
 893
 894   mic->mbmi.mode = mode_selected;
 895   mic->mbmi.tx_size = best_tx;
 896
 897   return best_rd;
 898 }
 899
 900 static void super_block_uvrd(const VP9_COMP *cpi, MACROBLOCK *x,
 901                              int *rate, int64_t *distortion, int *skippable,
 902                              int64_t *sse, BLOCK_SIZE bsize,
 903                              int64_t ref_best_rd) {
 904   MACROBLOCKD *const xd = &x->e_mbd;
 905   MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
 906   const TX_SIZE uv_tx_size = get_uv_tx_size(mbmi, &xd->plane[1]);
 907   int plane;
 908   int pnrate = 0, pnskip = 1;
 909   int64_t pndist = 0, pnsse = 0;
 910
 911   if (ref_best_rd < 0)
 912     goto term;
 913
 914   if (is_inter_block(mbmi)) {
 915     int plane;
 916     for (plane = 1; plane < MAX_MB_PLANE; ++plane)
 917       vp9_subtract_plane(x, bsize, plane);
 918   }
 919
 920   *rate = 0;
 921   *distortion = 0;
 922   *sse = 0;
 923   *skippable = 1;
 924
 925   for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
 926     txfm_rd_in_plane(x, &pnrate, &pndist, &pnskip, &pnsse,
 927                      ref_best_rd, plane, bsize, uv_tx_size,
 928                      cpi->sf.use_fast_coef_costing);
 929     if (pnrate == INT_MAX)
 930       goto term;
 931     *rate += pnrate;
 932     *distortion += pndist;
 933     *sse += pnsse;
 934     *skippable &= pnskip;
 935   }
 936   return;
 937
 938   term:
 939   *rate = INT_MAX;
 940   *distortion = INT64_MAX;
 941   *sse = INT64_MAX;
 942   *skippable = 0;
 943   return;
 944 }
 945
 946 static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
 947                                        PICK_MODE_CONTEXT *ctx,
 948                                        int *rate, int *rate_tokenonly,
 949                                        int64_t *distortion, int *skippable,
 950                                        BLOCK_SIZE bsize, TX_SIZE max_tx_size) {
 951   MACROBLOCKD *xd = &x->e_mbd;
 952   PREDICTION_MODE mode;
 953   PREDICTION_MODE mode_selected = DC_PRED;
 954   int64_t best_rd = INT64_MAX, this_rd;
 955   int this_rate_tokenonly, this_rate, s;
 956   int64_t this_distortion, this_sse;
 957
 958   for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
 959     if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode)))
 960       continue;
 961
 962     xd->mi[0].src_mi->mbmi.uv_mode = mode;
 963
 964     super_block_uvrd(cpi, x, &this_rate_tokenonly,
 965                      &this_distortion, &s, &this_sse, bsize, best_rd);
 966     if (this_rate_tokenonly == INT_MAX)
 967       continue;
 968     this_rate = this_rate_tokenonly +
 969                 cpi->intra_uv_mode_cost[cpi->common.frame_type][mode];
 970     this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
 971
 972     if (this_rd < best_rd) {
 973       mode_selected   = mode;
 974       best_rd         = this_rd;
 975       *rate           = this_rate;
 976       *rate_tokenonly = this_rate_tokenonly;
 977       *distortion     = this_distortion;
 978       *skippable      = s;
 979       if (!x->select_tx_size)
 980         swap_block_ptr(x, ctx, 2, 0, 1, MAX_MB_PLANE);
 981     }
 982   }
 983
 984   xd->mi[0].src_mi->mbmi.uv_mode = mode_selected;
 985   return best_rd;
 986 }
 987
 988 static int64_t rd_sbuv_dcpred(const VP9_COMP *cpi, MACROBLOCK *x,
 989                               int *rate, int *rate_tokenonly,
 990                               int64_t *distortion, int *skippable,
 991                               BLOCK_SIZE bsize) {
 992   const VP9_COMMON *cm = &cpi->common;
 993   int64_t unused;
 994
 995   x->e_mbd.mi[0].src_mi->mbmi.uv_mode = DC_PRED;
 996   super_block_uvrd(cpi, x, rate_tokenonly, distortion,
 997                    skippable, &unused, bsize, INT64_MAX);
 998   *rate = *rate_tokenonly + cpi->intra_uv_mode_cost[cm->frame_type][DC_PRED];
 999   return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
1000 }
1001
1002 static void choose_intra_uv_mode(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
1003                                  BLOCK_SIZE bsize, TX_SIZE max_tx_size,
1004                                  int *rate_uv, int *rate_uv_tokenonly,
1005                                  int64_t *dist_uv, int *skip_uv,
1006                                  PREDICTION_MODE *mode_uv) {
1007   MACROBLOCK *const x = &cpi->mb;
1008
1009   // Use an estimated rd for uv_intra based on DC_PRED if the
1010   // appropriate speed flag is set.
1011   if (cpi->sf.use_uv_intra_rd_estimate) {
1012     rd_sbuv_dcpred(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv,
1013                    skip_uv, bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
1014   // Else do a proper rd search for each possible transform size that may
1015   // be considered in the main rd loop.
1016   } else {
1017     rd_pick_intra_sbuv_mode(cpi, x, ctx,
1018                             rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
1019                             bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize, max_tx_size);
1020   }
1021   *mode_uv = x->e_mbd.mi[0].src_mi->mbmi.uv_mode;
1022 }
1023
1024 static int cost_mv_ref(const VP9_COMP *cpi, PREDICTION_MODE mode,
1025                        int mode_context) {
1026   assert(is_inter_mode(mode));
1027   return cpi->inter_mode_cost[mode_context][INTER_OFFSET(mode)];
1028 }
1029
1030 static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
1031                                 BLOCK_SIZE bsize,
1032                                 int_mv *frame_mv,
1033                                 int mi_row, int mi_col,
1034                                 int_mv single_newmv[MAX_REF_FRAMES],
1035                                 int *rate_mv);
1036
1037 static int set_and_cost_bmi_mvs(VP9_COMP *cpi, MACROBLOCKD *xd, int i,
1038                                 PREDICTION_MODE mode, int_mv this_mv[2],
1039                                 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
1040                                 int_mv seg_mvs[MAX_REF_FRAMES],
1041                                 int_mv *best_ref_mv[2], const int *mvjcost,
1042                                 int *mvcost[2]) {
1043   MODE_INFO *const mic = xd->mi[0].src_mi;
1044   const MB_MODE_INFO *const mbmi = &mic->mbmi;
1045   int thismvcost = 0;
1046   int idx, idy;
1047   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
1048   const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
1049   const int is_compound = has_second_ref(mbmi);
1050
1051   switch (mode) {
1052     case NEWMV:
1053       this_mv[0].as_int = seg_mvs[mbmi->ref_frame[0]].as_int;
1054       thismvcost += vp9_mv_bit_cost(&this_mv[0].as_mv, &best_ref_mv[0]->as_mv,
1055                                     mvjcost, mvcost, MV_COST_WEIGHT_SUB);
1056       if (is_compound) {
1057         this_mv[1].as_int = seg_mvs[mbmi->ref_frame[1]].as_int;
1058         thismvcost += vp9_mv_bit_cost(&this_mv[1].as_mv, &best_ref_mv[1]->as_mv,
1059                                       mvjcost, mvcost, MV_COST_WEIGHT_SUB);
1060       }
1061       break;
1062     case NEARMV:
1063     case NEARESTMV:
1064       this_mv[0].as_int = frame_mv[mode][mbmi->ref_frame[0]].as_int;
1065       if (is_compound)
1066         this_mv[1].as_int = frame_mv[mode][mbmi->ref_frame[1]].as_int;
1067       break;
1068     case ZEROMV:
1069       this_mv[0].as_int = 0;
1070       if (is_compound)
1071         this_mv[1].as_int = 0;
1072       break;
1073     default:
1074       break;
1075   }
1076
1077   mic->bmi[i].as_mv[0].as_int = this_mv[0].as_int;
1078   if (is_compound)
1079     mic->bmi[i].as_mv[1].as_int = this_mv[1].as_int;
1080
1081   mic->bmi[i].as_mode = mode;
1082
1083   for (idy = 0; idy < num_4x4_blocks_high; ++idy)
1084     for (idx = 0; idx < num_4x4_blocks_wide; ++idx)
1085       vpx_memcpy(&mic->bmi[i + idy * 2 + idx],
1086                  &mic->bmi[i], sizeof(mic->bmi[i]));
1087
1088   return cost_mv_ref(cpi, mode, mbmi->mode_context[mbmi->ref_frame[0]]) +
1089             thismvcost;
1090 }
1091
1092 static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
1093                                        MACROBLOCK *x,
1094                                        int64_t best_yrd,
1095                                        int i,
1096                                        int *labelyrate,
1097                                        int64_t *distortion, int64_t *sse,
1098                                        ENTROPY_CONTEXT *ta,
1099                                        ENTROPY_CONTEXT *tl,
1100                                        int mi_row, int mi_col) {
1101   int k;
1102   MACROBLOCKD *xd = &x->e_mbd;
1103   struct macroblockd_plane *const pd = &xd->plane[0];
1104   struct macroblock_plane *const p = &x->plane[0];
1105   MODE_INFO *const mi = xd->mi[0].src_mi;
1106   const BLOCK_SIZE plane_bsize = get_plane_block_size(mi->mbmi.sb_type, pd);
1107   const int width = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
1108   const int height = 4 * num_4x4_blocks_high_lookup[plane_bsize];
1109   int idx, idy;
1110
1111   const uint8_t *const src = &p->src.buf[raster_block_offset(BLOCK_8X8, i,
1112                                                              p->src.stride)];
1113   uint8_t *const dst = &pd->dst.buf[raster_block_offset(BLOCK_8X8, i,
1114                                                         pd->dst.stride)];
1115   int64_t thisdistortion = 0, thissse = 0;
1116   int thisrate = 0, ref;
1117   const scan_order *so = &vp9_default_scan_orders[TX_4X4];
1118   const int is_compound = has_second_ref(&mi->mbmi);
1119   const InterpKernel *kernel = vp9_get_interp_kernel(mi->mbmi.interp_filter);
1120
1121   for (ref = 0; ref < 1 + is_compound; ++ref) {
1122     const uint8_t *pre = &pd->pre[ref].buf[raster_block_offset(BLOCK_8X8, i,
1123                                                pd->pre[ref].stride)];
1124     vp9_build_inter_predictor(pre, pd->pre[ref].stride,
1125                               dst, pd->dst.stride,
1126                               &mi->bmi[i].as_mv[ref].as_mv,
1127                               &xd->block_refs[ref]->sf, width, height, ref,
1128                               kernel, MV_PRECISION_Q3,
1129                               mi_col * MI_SIZE + 4 * (i % 2),
1130                               mi_row * MI_SIZE + 4 * (i / 2));
1131   }
1132
1133   vp9_subtract_block(height, width,
1134                      raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 8,
1135                      src, p->src.stride,
1136                      dst, pd->dst.stride);
1137
1138   k = i;
1139   for (idy = 0; idy < height / 4; ++idy) {
1140     for (idx = 0; idx < width / 4; ++idx) {
1141       int64_t ssz, rd, rd1, rd2;
1142       tran_low_t* coeff;
1143
1144       k += (idy * 2 + idx);
1145       coeff = BLOCK_OFFSET(p->coeff, k);
1146       x->fwd_txm4x4(raster_block_offset_int16(BLOCK_8X8, k, p->src_diff),
1147                     coeff, 8);
1148       vp9_regular_quantize_b_4x4(x, 0, k, so->scan, so->iscan);
1149       thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k),
1150                                         16, &ssz);
1151       thissse += ssz;
1152       thisrate += cost_coeffs(x, 0, k, ta + (k & 1), tl + (k >> 1), TX_4X4,
1153                               so->scan, so->neighbors,
1154                               cpi->sf.use_fast_coef_costing);
1155       rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion >> 2);
1156       rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse >> 2);
1157       rd = MIN(rd1, rd2);
1158       if (rd >= best_yrd)
1159         return INT64_MAX;
1160     }
1161   }
1162
1163   *distortion = thisdistortion >> 2;
1164   *labelyrate = thisrate;
1165   *sse = thissse >> 2;
1166
1167   return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion);
1168 }
1169
1170 typedef struct {
1171   int eobs;
1172   int brate;
1173   int byrate;
1174   int64_t bdist;
1175   int64_t bsse;
1176   int64_t brdcost;
1177   int_mv mvs[2];
1178   ENTROPY_CONTEXT ta[2];
1179   ENTROPY_CONTEXT tl[2];
1180 } SEG_RDSTAT;
1181
1182 typedef struct {
1183   int_mv *ref_mv[2];
1184   int_mv mvp;
1185
1186   int64_t segment_rd;
1187   int r;
1188   int64_t d;
1189   int64_t sse;
1190   int segment_yrate;
1191   PREDICTION_MODE modes[4];
1192   SEG_RDSTAT rdstat[4][INTER_MODES];
1193   int mvthresh;
1194 } BEST_SEG_INFO;
1195
1196 static INLINE int mv_check_bounds(const MACROBLOCK *x, const MV *mv) {
1197   return (mv->row >> 3) < x->mv_row_min ||
1198          (mv->row >> 3) > x->mv_row_max ||
1199          (mv->col >> 3) < x->mv_col_min ||
1200          (mv->col >> 3) > x->mv_col_max;
1201 }
1202
1203 static INLINE void mi_buf_shift(MACROBLOCK *x, int i) {
1204   MB_MODE_INFO *const mbmi = &x->e_mbd.mi[0].src_mi->mbmi;
1205   struct macroblock_plane *const p = &x->plane[0];
1206   struct macroblockd_plane *const pd = &x->e_mbd.plane[0];
1207
1208   p->src.buf = &p->src.buf[raster_block_offset(BLOCK_8X8, i, p->src.stride)];
1209   assert(((intptr_t)pd->pre[0].buf & 0x7) == 0);
1210   pd->pre[0].buf = &pd->pre[0].buf[raster_block_offset(BLOCK_8X8, i,
1211                                                        pd->pre[0].stride)];
1212   if (has_second_ref(mbmi))
1213     pd->pre[1].buf = &pd->pre[1].buf[raster_block_offset(BLOCK_8X8, i,
1214                                                          pd->pre[1].stride)];
1215 }
1216
1217 static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src,
1218                                   struct buf_2d orig_pre[2]) {
1219   MB_MODE_INFO *mbmi = &x->e_mbd.mi[0].src_mi->mbmi;
1220   x->plane[0].src = orig_src;
1221   x->e_mbd.plane[0].pre[0] = orig_pre[0];
1222   if (has_second_ref(mbmi))
1223     x->e_mbd.plane[0].pre[1] = orig_pre[1];
1224 }
1225
1226 static INLINE int mv_has_subpel(const MV *mv) {
1227   return (mv->row & 0x0F) || (mv->col & 0x0F);
1228 }
1229
1230 // Check if NEARESTMV/NEARMV/ZEROMV is the cheapest way encode zero motion.
1231 // TODO(aconverse): Find out if this is still productive then clean up or remove
1232 static int check_best_zero_mv(
1233     const VP9_COMP *cpi, const uint8_t mode_context[MAX_REF_FRAMES],
1234     int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES], int this_mode,
1235     const MV_REFERENCE_FRAME ref_frames[2]) {
1236   if ((this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) &&
1237       frame_mv[this_mode][ref_frames[0]].as_int == 0 &&
1238       (ref_frames[1] == NONE ||
1239        frame_mv[this_mode][ref_frames[1]].as_int == 0)) {
1240     int rfc = mode_context[ref_frames[0]];
1241     int c1 = cost_mv_ref(cpi, NEARMV, rfc);
1242     int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
1243     int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
1244
1245     if (this_mode == NEARMV) {
1246       if (c1 > c3) return 0;
1247     } else if (this_mode == NEARESTMV) {
1248       if (c2 > c3) return 0;
1249     } else {
1250       assert(this_mode == ZEROMV);
1251       if (ref_frames[1] == NONE) {
1252         if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0) ||
1253             (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0))
1254           return 0;
1255       } else {
1256         if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0 &&
1257              frame_mv[NEARESTMV][ref_frames[1]].as_int == 0) ||
1258             (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0 &&
1259              frame_mv[NEARMV][ref_frames[1]].as_int == 0))
1260           return 0;
1261       }
1262     }
1263   }
1264   return 1;
1265 }
1266
1267 static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x,
1268                                         const TileInfo * const tile,
1269                                         int_mv *best_ref_mv,
1270                                         int_mv *second_best_ref_mv,
1271                                         int64_t best_rd, int *returntotrate,
1272                                         int *returnyrate,
1273                                         int64_t *returndistortion,
1274                                         int *skippable, int64_t *psse,
1275                                         int mvthresh,
1276                                         int_mv seg_mvs[4][MAX_REF_FRAMES],
1277                                         BEST_SEG_INFO *bsi_buf, int filter_idx,
1278                                         int mi_row, int mi_col) {
1279   int i;
1280   BEST_SEG_INFO *bsi = bsi_buf + filter_idx;
1281   MACROBLOCKD *xd = &x->e_mbd;
1282   MODE_INFO *mi = xd->mi[0].src_mi;
1283   MB_MODE_INFO *mbmi = &mi->mbmi;
1284   int mode_idx;
1285   int k, br = 0, idx, idy;
1286   int64_t bd = 0, block_sse = 0;
1287   PREDICTION_MODE this_mode;
1288   VP9_COMMON *cm = &cpi->common;
1289   struct macroblock_plane *const p = &x->plane[0];
1290   struct macroblockd_plane *const pd = &xd->plane[0];
1291   const int label_count = 4;
1292   int64_t this_segment_rd = 0;
1293   int label_mv_thresh;
1294   int segmentyrate = 0;
1295   const BLOCK_SIZE bsize = mbmi->sb_type;
1296   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1297   const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1298   ENTROPY_CONTEXT t_above[2], t_left[2];
1299   int subpelmv = 1, have_ref = 0;
1300   const int has_second_rf = has_second_ref(mbmi);
1301   const int inter_mode_mask = cpi->sf.inter_mode_mask[bsize];
1302
1303   vp9_zero(*bsi);
1304
1305   bsi->segment_rd = best_rd;
1306   bsi->ref_mv[0] = best_ref_mv;
1307   bsi->ref_mv[1] = second_best_ref_mv;
1308   bsi->mvp.as_int = best_ref_mv->as_int;
1309   bsi->mvthresh = mvthresh;
1310
1311   for (i = 0; i < 4; i++)
1312     bsi->modes[i] = ZEROMV;
1313
1314   vpx_memcpy(t_above, pd->above_context, sizeof(t_above));
1315   vpx_memcpy(t_left, pd->left_context, sizeof(t_left));
1316
1317   // 64 makes this threshold really big effectively
1318   // making it so that we very rarely check mvs on
1319   // segments.   setting this to 1 would make mv thresh
1320   // roughly equal to what it is for macroblocks
1321   label_mv_thresh = 1 * bsi->mvthresh / label_count;
1322
1323   // Segmentation method overheads
1324   for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
1325     for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
1326       // TODO(jingning,rbultje): rewrite the rate-distortion optimization
1327       // loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop
1328       int_mv mode_mv[MB_MODE_COUNT][2];
1329       int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
1330       PREDICTION_MODE mode_selected = ZEROMV;
1331       int64_t best_rd = INT64_MAX;
1332       const int i = idy * 2 + idx;
1333       int ref;
1334
1335       for (ref = 0; ref < 1 + has_second_rf; ++ref) {
1336         const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
1337         frame_mv[ZEROMV][frame].as_int = 0;
1338         vp9_append_sub8x8_mvs_for_idx(cm, xd, tile, i, ref, mi_row, mi_col,
1339                                       &frame_mv[NEARESTMV][frame],
1340                                       &frame_mv[NEARMV][frame]);
1341       }
1342
1343       // search for the best motion vector on this segment
1344       for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
1345         const struct buf_2d orig_src = x->plane[0].src;
1346         struct buf_2d orig_pre[2];
1347
1348         mode_idx = INTER_OFFSET(this_mode);
1349         bsi->rdstat[i][mode_idx].brdcost = INT64_MAX;
1350         if (!(inter_mode_mask & (1 << this_mode)))
1351           continue;
1352
1353         if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv,
1354                                 this_mode, mbmi->ref_frame))
1355           continue;
1356
1357         vpx_memcpy(orig_pre, pd->pre, sizeof(orig_pre));
1358         vpx_memcpy(bsi->rdstat[i][mode_idx].ta, t_above,
1359                    sizeof(bsi->rdstat[i][mode_idx].ta));
1360         vpx_memcpy(bsi->rdstat[i][mode_idx].tl, t_left,
1361                    sizeof(bsi->rdstat[i][mode_idx].tl));
1362
1363         // motion search for newmv (single predictor case only)
1364         if (!has_second_rf && this_mode == NEWMV &&
1365             seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) {
1366           MV *const new_mv = &mode_mv[NEWMV][0].as_mv;
1367           int step_param = 0;
1368           int thissme, bestsme = INT_MAX;
1369           int sadpb = x->sadperbit4;
1370           MV mvp_full;
1371           int max_mv;
1372           int sad_list[5];
1373
1374           /* Is the best so far sufficiently good that we cant justify doing
1375            * and new motion search. */
1376           if (best_rd < label_mv_thresh)
1377             break;
1378
1379           if (cpi->oxcf.mode != BEST) {
1380             // use previous block's result as next block's MV predictor.
1381             if (i > 0) {
1382               bsi->mvp.as_int = mi->bmi[i - 1].as_mv[0].as_int;
1383               if (i == 2)
1384                 bsi->mvp.as_int = mi->bmi[i - 2].as_mv[0].as_int;
1385             }
1386           }
1387           if (i == 0)
1388             max_mv = x->max_mv_context[mbmi->ref_frame[0]];
1389           else
1390             max_mv = MAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3;
1391
1392           if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) {
1393             // Take wtd average of the step_params based on the last frame's
1394             // max mv magnitude and the best ref mvs of the current block for
1395             // the given reference.
1396             step_param = (vp9_init_search_range(max_mv) +
1397                               cpi->mv_step_param) / 2;
1398           } else {
1399             step_param = cpi->mv_step_param;
1400           }
1401
1402           mvp_full.row = bsi->mvp.as_mv.row >> 3;
1403           mvp_full.col = bsi->mvp.as_mv.col >> 3;
1404
1405           if (cpi->sf.adaptive_motion_search) {
1406             mvp_full.row = x->pred_mv[mbmi->ref_frame[0]].row >> 3;
1407             mvp_full.col = x->pred_mv[mbmi->ref_frame[0]].col >> 3;
1408             step_param = MAX(step_param, 8);
1409           }
1410
1411           // adjust src pointer for this block
1412           mi_buf_shift(x, i);
1413
1414           vp9_set_mv_search_range(x, &bsi->ref_mv[0]->as_mv);
1415
1416           bestsme = vp9_full_pixel_search(
1417               cpi, x, bsize, &mvp_full, step_param, sadpb,
1418               cpi->sf.mv.subpel_search_method != SUBPEL_TREE ? sad_list : NULL,
1419               &bsi->ref_mv[0]->as_mv, new_mv,
1420               INT_MAX, 1);
1421
1422           // Should we do a full search (best quality only)
1423           if (cpi->oxcf.mode == BEST) {
1424             int_mv *const best_mv = &mi->bmi[i].as_mv[0];
1425             /* Check if mvp_full is within the range. */
1426             clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max,
1427                      x->mv_row_min, x->mv_row_max);
1428             thissme = cpi->full_search_sad(x, &mvp_full,
1429                                            sadpb, 16, &cpi->fn_ptr[bsize],
1430                                            &bsi->ref_mv[0]->as_mv,
1431                                            &best_mv->as_mv);
1432             sad_list[1] = sad_list[2] = sad_list[3] = sad_list[4] = INT_MAX;
1433             if (thissme < bestsme) {
1434               bestsme = thissme;
1435               *new_mv = best_mv->as_mv;
1436             } else {
1437               // The full search result is actually worse so re-instate the
1438               // previous best vector
1439               best_mv->as_mv = *new_mv;
1440             }
1441           }
1442
1443           if (bestsme < INT_MAX) {
1444             int distortion;
1445             cpi->find_fractional_mv_step(
1446                 x,
1447                 new_mv,
1448                 &bsi->ref_mv[0]->as_mv,
1449                 cm->allow_high_precision_mv,
1450                 x->errorperbit, &cpi->fn_ptr[bsize],
1451                 cpi->sf.mv.subpel_force_stop,
1452                 cpi->sf.mv.subpel_iters_per_step,
1453                 cond_sad_list(cpi, sad_list),
1454                 x->nmvjointcost, x->mvcost,
1455                 &distortion,
1456                 &x->pred_sse[mbmi->ref_frame[0]],
1457                 NULL, 0, 0);
1458
1459             // save motion search result for use in compound prediction
1460             seg_mvs[i][mbmi->ref_frame[0]].as_mv = *new_mv;
1461           }
1462
1463           if (cpi->sf.adaptive_motion_search)
1464             x->pred_mv[mbmi->ref_frame[0]] = *new_mv;
1465
1466           // restore src pointers
1467           mi_buf_restore(x, orig_src, orig_pre);
1468         }
1469
1470         if (has_second_rf) {
1471           if (seg_mvs[i][mbmi->ref_frame[1]].as_int == INVALID_MV ||
1472               seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV)
1473             continue;
1474         }
1475
1476         if (has_second_rf && this_mode == NEWMV &&
1477             mbmi->interp_filter == EIGHTTAP) {
1478           // adjust src pointers
1479           mi_buf_shift(x, i);
1480           if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
1481             int rate_mv;
1482             joint_motion_search(cpi, x, bsize, frame_mv[this_mode],
1483                                 mi_row, mi_col, seg_mvs[i],
1484                                 &rate_mv);
1485             seg_mvs[i][mbmi->ref_frame[0]].as_int =
1486                 frame_mv[this_mode][mbmi->ref_frame[0]].as_int;
1487             seg_mvs[i][mbmi->ref_frame[1]].as_int =
1488                 frame_mv[this_mode][mbmi->ref_frame[1]].as_int;
1489           }
1490           // restore src pointers
1491           mi_buf_restore(x, orig_src, orig_pre);
1492         }
1493
1494         bsi->rdstat[i][mode_idx].brate =
1495             set_and_cost_bmi_mvs(cpi, xd, i, this_mode, mode_mv[this_mode],
1496                                  frame_mv, seg_mvs[i], bsi->ref_mv,
1497                                  x->nmvjointcost, x->mvcost);
1498
1499         for (ref = 0; ref < 1 + has_second_rf; ++ref) {
1500           bsi->rdstat[i][mode_idx].mvs[ref].as_int =
1501               mode_mv[this_mode][ref].as_int;
1502           if (num_4x4_blocks_wide > 1)
1503             bsi->rdstat[i + 1][mode_idx].mvs[ref].as_int =
1504                 mode_mv[this_mode][ref].as_int;
1505           if (num_4x4_blocks_high > 1)
1506             bsi->rdstat[i + 2][mode_idx].mvs[ref].as_int =
1507                 mode_mv[this_mode][ref].as_int;
1508         }
1509
1510         // Trap vectors that reach beyond the UMV borders
1511         if (mv_check_bounds(x, &mode_mv[this_mode][0].as_mv) ||
1512             (has_second_rf &&
1513              mv_check_bounds(x, &mode_mv[this_mode][1].as_mv)))
1514           continue;
1515
1516         if (filter_idx > 0) {
1517           BEST_SEG_INFO *ref_bsi = bsi_buf;
1518           subpelmv = 0;
1519           have_ref = 1;
1520
1521           for (ref = 0; ref < 1 + has_second_rf; ++ref) {
1522             subpelmv |= mv_has_subpel(&mode_mv[this_mode][ref].as_mv);
1523             have_ref &= mode_mv[this_mode][ref].as_int ==
1524                 ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int;
1525           }
1526
1527           if (filter_idx > 1 && !subpelmv && !have_ref) {
1528             ref_bsi = bsi_buf + 1;
1529             have_ref = 1;
1530             for (ref = 0; ref < 1 + has_second_rf; ++ref)
1531               have_ref &= mode_mv[this_mode][ref].as_int ==
1532                   ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int;
1533           }
1534
1535           if (!subpelmv && have_ref &&
1536               ref_bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
1537             vpx_memcpy(&bsi->rdstat[i][mode_idx], &ref_bsi->rdstat[i][mode_idx],
1538                        sizeof(SEG_RDSTAT));
1539             if (num_4x4_blocks_wide > 1)
1540               bsi->rdstat[i + 1][mode_idx].eobs =
1541                   ref_bsi->rdstat[i + 1][mode_idx].eobs;
1542             if (num_4x4_blocks_high > 1)
1543               bsi->rdstat[i + 2][mode_idx].eobs =
1544                   ref_bsi->rdstat[i + 2][mode_idx].eobs;
1545
1546             if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
1547               mode_selected = this_mode;
1548               best_rd = bsi->rdstat[i][mode_idx].brdcost;
1549             }
1550             continue;
1551           }
1552         }
1553
1554         bsi->rdstat[i][mode_idx].brdcost =
1555             encode_inter_mb_segment(cpi, x,
1556                                     bsi->segment_rd - this_segment_rd, i,
1557                                     &bsi->rdstat[i][mode_idx].byrate,
1558                                     &bsi->rdstat[i][mode_idx].bdist,
1559                                     &bsi->rdstat[i][mode_idx].bsse,
1560                                     bsi->rdstat[i][mode_idx].ta,
1561                                     bsi->rdstat[i][mode_idx].tl,
1562                                     mi_row, mi_col);
1563         if (bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
1564           bsi->rdstat[i][mode_idx].brdcost += RDCOST(x->rdmult, x->rddiv,
1565                                             bsi->rdstat[i][mode_idx].brate, 0);
1566           bsi->rdstat[i][mode_idx].brate += bsi->rdstat[i][mode_idx].byrate;
1567           bsi->rdstat[i][mode_idx].eobs = p->eobs[i];
1568           if (num_4x4_blocks_wide > 1)
1569             bsi->rdstat[i + 1][mode_idx].eobs = p->eobs[i + 1];
1570           if (num_4x4_blocks_high > 1)
1571             bsi->rdstat[i + 2][mode_idx].eobs = p->eobs[i + 2];
1572         }
1573
1574         if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
1575           mode_selected = this_mode;
1576           best_rd = bsi->rdstat[i][mode_idx].brdcost;
1577         }
1578       } /*for each 4x4 mode*/
1579
1580       if (best_rd == INT64_MAX) {
1581         int iy, midx;
1582         for (iy = i + 1; iy < 4; ++iy)
1583           for (midx = 0; midx < INTER_MODES; ++midx)
1584             bsi->rdstat[iy][midx].brdcost = INT64_MAX;
1585         bsi->segment_rd = INT64_MAX;
1586         return INT64_MAX;;
1587       }
1588
1589       mode_idx = INTER_OFFSET(mode_selected);
1590       vpx_memcpy(t_above, bsi->rdstat[i][mode_idx].ta, sizeof(t_above));
1591       vpx_memcpy(t_left, bsi->rdstat[i][mode_idx].tl, sizeof(t_left));
1592
1593       set_and_cost_bmi_mvs(cpi, xd, i, mode_selected, mode_mv[mode_selected],
1594                            frame_mv, seg_mvs[i], bsi->ref_mv, x->nmvjointcost,
1595                            x->mvcost);
1596
1597       br += bsi->rdstat[i][mode_idx].brate;
1598       bd += bsi->rdstat[i][mode_idx].bdist;
1599       block_sse += bsi->rdstat[i][mode_idx].bsse;
1600       segmentyrate += bsi->rdstat[i][mode_idx].byrate;
1601       this_segment_rd += bsi->rdstat[i][mode_idx].brdcost;
1602
1603       if (this_segment_rd > bsi->segment_rd) {
1604         int iy, midx;
1605         for (iy = i + 1; iy < 4; ++iy)
1606           for (midx = 0; midx < INTER_MODES; ++midx)
1607             bsi->rdstat[iy][midx].brdcost = INT64_MAX;
1608         bsi->segment_rd = INT64_MAX;
1609         return INT64_MAX;;
1610       }
1611     }
1612   } /* for each label */
1613
1614   bsi->r = br;
1615   bsi->d = bd;
1616   bsi->segment_yrate = segmentyrate;
1617   bsi->segment_rd = this_segment_rd;
1618   bsi->sse = block_sse;
1619
1620   // update the coding decisions
1621   for (k = 0; k < 4; ++k)
1622     bsi->modes[k] = mi->bmi[k].as_mode;
1623
1624   if (bsi->segment_rd > best_rd)
1625     return INT64_MAX;
1626   /* set it to the best */
1627   for (i = 0; i < 4; i++) {
1628     mode_idx = INTER_OFFSET(bsi->modes[i]);
1629     mi->bmi[i].as_mv[0].as_int = bsi->rdstat[i][mode_idx].mvs[0].as_int;
1630     if (has_second_ref(mbmi))
1631       mi->bmi[i].as_mv[1].as_int = bsi->rdstat[i][mode_idx].mvs[1].as_int;
1632     x->plane[0].eobs[i] = bsi->rdstat[i][mode_idx].eobs;
1633     mi->bmi[i].as_mode = bsi->modes[i];
1634   }
1635
1636   /*
1637    * used to set mbmi->mv.as_int
1638    */
1639   *returntotrate = bsi->r;
1640   *returndistortion = bsi->d;
1641   *returnyrate = bsi->segment_yrate;
1642   *skippable = vp9_is_skippable_in_plane(x, BLOCK_8X8, 0);
1643   *psse = bsi->sse;
1644   mbmi->mode = bsi->modes[3];
1645
1646   return bsi->segment_rd;
1647 }
1648
1649 static void estimate_ref_frame_costs(const VP9_COMMON *cm,
1650                                      const MACROBLOCKD *xd,
1651                                      int segment_id,
1652                                      unsigned int *ref_costs_single,
1653                                      unsigned int *ref_costs_comp,
1654                                      vp9_prob *comp_mode_p) {
1655   int seg_ref_active = vp9_segfeature_active(&cm->seg, segment_id,
1656                                              SEG_LVL_REF_FRAME);
1657   if (seg_ref_active) {
1658     vpx_memset(ref_costs_single, 0, MAX_REF_FRAMES * sizeof(*ref_costs_single));
1659     vpx_memset(ref_costs_comp,   0, MAX_REF_FRAMES * sizeof(*ref_costs_comp));
1660     *comp_mode_p = 128;
1661   } else {
1662     vp9_prob intra_inter_p = vp9_get_intra_inter_prob(cm, xd);
1663     vp9_prob comp_inter_p = 128;
1664
1665     if (cm->reference_mode == REFERENCE_MODE_SELECT) {
1666       comp_inter_p = vp9_get_reference_mode_prob(cm, xd);
1667       *comp_mode_p = comp_inter_p;
1668     } else {
1669       *comp_mode_p = 128;
1670     }
1671
1672     ref_costs_single[INTRA_FRAME] = vp9_cost_bit(intra_inter_p, 0);
1673
1674     if (cm->reference_mode != COMPOUND_REFERENCE) {
1675       vp9_prob ref_single_p1 = vp9_get_pred_prob_single_ref_p1(cm, xd);
1676       vp9_prob ref_single_p2 = vp9_get_pred_prob_single_ref_p2(cm, xd);
1677       unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1);
1678
1679       if (cm->reference_mode == REFERENCE_MODE_SELECT)
1680         base_cost += vp9_cost_bit(comp_inter_p, 0);
1681
1682       ref_costs_single[LAST_FRAME] = ref_costs_single[GOLDEN_FRAME] =
1683           ref_costs_single[ALTREF_FRAME] = base_cost;
1684       ref_costs_single[LAST_FRAME]   += vp9_cost_bit(ref_single_p1, 0);
1685       ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p1, 1);
1686       ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p1, 1);
1687       ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p2, 0);
1688       ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p2, 1);
1689     } else {
1690       ref_costs_single[LAST_FRAME]   = 512;
1691       ref_costs_single[GOLDEN_FRAME] = 512;
1692       ref_costs_single[ALTREF_FRAME] = 512;
1693     }
1694     if (cm->reference_mode != SINGLE_REFERENCE) {
1695       vp9_prob ref_comp_p = vp9_get_pred_prob_comp_ref_p(cm, xd);
1696       unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1);
1697
1698       if (cm->reference_mode == REFERENCE_MODE_SELECT)
1699         base_cost += vp9_cost_bit(comp_inter_p, 1);
1700
1701       ref_costs_comp[LAST_FRAME]   = base_cost + vp9_cost_bit(ref_comp_p, 0);
1702       ref_costs_comp[GOLDEN_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 1);
1703     } else {
1704       ref_costs_comp[LAST_FRAME]   = 512;
1705       ref_costs_comp[GOLDEN_FRAME] = 512;
1706     }
1707   }
1708 }
1709
1710 static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
1711                          int mode_index,
1712                          int64_t comp_pred_diff[REFERENCE_MODES],
1713                          const int64_t tx_size_diff[TX_MODES],
1714                          int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS],
1715                          int skippable) {
1716   MACROBLOCKD *const xd = &x->e_mbd;
1717
1718   // Take a snapshot of the coding context so it can be
1719   // restored if we decide to encode this way
1720   ctx->skip = x->skip;
1721   ctx->skippable = skippable;
1722   ctx->best_mode_index = mode_index;
1723   ctx->mic = *xd->mi[0].src_mi;
1724   ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE];
1725   ctx->comp_pred_diff   = (int)comp_pred_diff[COMPOUND_REFERENCE];
1726   ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT];
1727
1728   vpx_memcpy(ctx->tx_rd_diff, tx_size_diff, sizeof(ctx->tx_rd_diff));
1729   vpx_memcpy(ctx->best_filter_diff, best_filter_diff,
1730              sizeof(*best_filter_diff) * SWITCHABLE_FILTER_CONTEXTS);
1731 }
1732
1733 static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
1734                                const TileInfo *const tile,
1735                                MV_REFERENCE_FRAME ref_frame,
1736                                BLOCK_SIZE block_size,
1737                                int mi_row, int mi_col,
1738                                int_mv frame_nearest_mv[MAX_REF_FRAMES],
1739                                int_mv frame_near_mv[MAX_REF_FRAMES],
1740                                struct buf_2d yv12_mb[4][MAX_MB_PLANE]) {
1741   const VP9_COMMON *cm = &cpi->common;
1742   const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
1743   MACROBLOCKD *const xd = &x->e_mbd;
1744   MODE_INFO *const mi = xd->mi[0].src_mi;
1745   int_mv *const candidates = mi->mbmi.ref_mvs[ref_frame];
1746   const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf;
1747
1748   // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this
1749   // use the UV scaling factors.
1750   vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf);
1751
1752   // Gets an initial list of candidate vectors from neighbours and orders them
1753   vp9_find_mv_refs(cm, xd, tile, mi, ref_frame, candidates, mi_row, mi_col);
1754
1755   // Candidate refinement carried out at encoder and decoder
1756   vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates,
1757                         &frame_nearest_mv[ref_frame],
1758                         &frame_near_mv[ref_frame]);
1759
1760   // Further refinement that is encode side only to test the top few candidates
1761   // in full and choose the best as the centre point for subsequent searches.
1762   // The current implementation doesn't support scaling.
1763   if (!vp9_is_scaled(sf) && block_size >= BLOCK_8X8)
1764     vp9_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride,
1765                 ref_frame, block_size);
1766 }
1767
1768 static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
1769                                  BLOCK_SIZE bsize,
1770                                  int mi_row, int mi_col,
1771                                  int_mv *tmp_mv, int *rate_mv) {
1772   MACROBLOCKD *xd = &x->e_mbd;
1773   const VP9_COMMON *cm = &cpi->common;
1774   MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
1775   struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}};
1776   int bestsme = INT_MAX;
1777   int step_param;
1778   int sadpb = x->sadperbit16;
1779   MV mvp_full;
1780   int ref = mbmi->ref_frame[0];
1781   MV ref_mv = mbmi->ref_mvs[ref][0].as_mv;
1782
1783   int tmp_col_min = x->mv_col_min;
1784   int tmp_col_max = x->mv_col_max;
1785   int tmp_row_min = x->mv_row_min;
1786   int tmp_row_max = x->mv_row_max;
1787   int sad_list[5];
1788
1789   const YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi,
1790                                                                         ref);
1791
1792   MV pred_mv[3];
1793   pred_mv[0] = mbmi->ref_mvs[ref][0].as_mv;
1794   pred_mv[1] = mbmi->ref_mvs[ref][1].as_mv;
1795   pred_mv[2] = x->pred_mv[ref];
1796
1797   if (scaled_ref_frame) {
1798     int i;
1799     // Swap out the reference frame for a version that's been scaled to
1800     // match the resolution of the current frame, allowing the existing
1801     // motion search code to be used without additional modifications.
1802     for (i = 0; i < MAX_MB_PLANE; i++)
1803       backup_yv12[i] = xd->plane[i].pre[0];
1804
1805     vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
1806   }
1807
1808   vp9_set_mv_search_range(x, &ref_mv);
1809
1810   // Work out the size of the first step in the mv step search.
1811   // 0 here is maximum length first step. 1 is MAX >> 1 etc.
1812   if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) {
1813     // Take wtd average of the step_params based on the last frame's
1814     // max mv magnitude and that based on the best ref mvs of the current
1815     // block for the given reference.
1816     step_param = (vp9_init_search_range(x->max_mv_context[ref]) +
1817                     cpi->mv_step_param) / 2;
1818   } else {
1819     step_param = cpi->mv_step_param;
1820   }
1821
1822   if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64) {
1823     int boffset = 2 * (b_width_log2(BLOCK_64X64) - MIN(b_height_log2(bsize),
1824                                                        b_width_log2(bsize)));
1825     step_param = MAX(step_param, boffset);
1826   }
1827
1828   if (cpi->sf.adaptive_motion_search) {
1829     int bwl = b_width_log2(bsize);
1830     int bhl = b_height_log2(bsize);
1831     int i;
1832     int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4);
1833
1834     if (tlevel < 5)
1835       step_param += 2;
1836
1837     for (i = LAST_FRAME; i <= ALTREF_FRAME && cm->show_frame; ++i) {
1838       if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) {
1839         x->pred_mv[ref].row = 0;
1840         x->pred_mv[ref].col = 0;
1841         tmp_mv->as_int = INVALID_MV;
1842
1843         if (scaled_ref_frame) {
1844           int i;
1845           for (i = 0; i < MAX_MB_PLANE; i++)
1846             xd->plane[i].pre[0] = backup_yv12[i];
1847         }
1848         return;
1849       }
1850     }
1851   }
1852
1853   mvp_full = pred_mv[x->mv_best_ref_index[ref]];
1854
1855   mvp_full.col >>= 3;
1856   mvp_full.row >>= 3;
1857
1858   bestsme = vp9_full_pixel_search(cpi, x, bsize, &mvp_full, step_param, sadpb,
1859                                   cond_sad_list(cpi, sad_list),
1860                                   &ref_mv, &tmp_mv->as_mv, INT_MAX, 1);
1861
1862   x->mv_col_min = tmp_col_min;
1863   x->mv_col_max = tmp_col_max;
1864   x->mv_row_min = tmp_row_min;
1865   x->mv_row_max = tmp_row_max;
1866
1867   if (bestsme < INT_MAX) {
1868     int dis;  /* TODO: use dis in distortion calculation later. */
1869     cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv,
1870                                  cm->allow_high_precision_mv,
1871                                  x->errorperbit,
1872                                  &cpi->fn_ptr[bsize],
1873                                  cpi->sf.mv.subpel_force_stop,
1874                                  cpi->sf.mv.subpel_iters_per_step,
1875                                  cond_sad_list(cpi, sad_list),
1876                                  x->nmvjointcost, x->mvcost,
1877                                  &dis, &x->pred_sse[ref], NULL, 0, 0);
1878   }
1879   *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv,
1880                              x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
1881
1882   if (cpi->sf.adaptive_motion_search)
1883     x->pred_mv[ref] = tmp_mv->as_mv;
1884
1885   if (scaled_ref_frame) {
1886     int i;
1887     for (i = 0; i < MAX_MB_PLANE; i++)
1888       xd->plane[i].pre[0] = backup_yv12[i];
1889   }
1890 }
1891
1892 static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
1893                                 BLOCK_SIZE bsize,
1894                                 int_mv *frame_mv,
1895                                 int mi_row, int mi_col,
1896                                 int_mv single_newmv[MAX_REF_FRAMES],
1897                                 int *rate_mv) {
1898   const int pw = 4 * num_4x4_blocks_wide_lookup[bsize];
1899   const int ph = 4 * num_4x4_blocks_high_lookup[bsize];
1900   MACROBLOCKD *xd = &x->e_mbd;
1901   MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
1902   const int refs[2] = { mbmi->ref_frame[0],
1903                         mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
1904   int_mv ref_mv[2];
1905   int ite, ref;
1906   // Prediction buffer from second frame.
1907   uint8_t *second_pred = vpx_memalign(16, pw * ph * sizeof(uint8_t));
1908   const InterpKernel *kernel = vp9_get_interp_kernel(mbmi->interp_filter);
1909
1910   // Do joint motion search in compound mode to get more accurate mv.
1911   struct buf_2d backup_yv12[2][MAX_MB_PLANE];
1912   struct buf_2d scaled_first_yv12 = xd->plane[0].pre[0];
1913   int last_besterr[2] = {INT_MAX, INT_MAX};
1914   const YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = {
1915     vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[0]),
1916     vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[1])
1917   };
1918
1919   for (ref = 0; ref < 2; ++ref) {
1920     ref_mv[ref] = mbmi->ref_mvs[refs[ref]][0];
1921
1922     if (scaled_ref_frame[ref]) {
1923       int i;
1924       // Swap out the reference frame for a version that's been scaled to
1925       // match the resolution of the current frame, allowing the existing
1926       // motion search code to be used without additional modifications.
1927       for (i = 0; i < MAX_MB_PLANE; i++)
1928         backup_yv12[ref][i] = xd->plane[i].pre[ref];
1929       vp9_setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col,
1930                            NULL);
1931     }
1932
1933     frame_mv[refs[ref]].as_int = single_newmv[refs[ref]].as_int;
1934   }
1935
1936   // Allow joint search multiple times iteratively for each ref frame
1937   // and break out the search loop if it couldn't find better mv.
1938   for (ite = 0; ite < 4; ite++) {
1939     struct buf_2d ref_yv12[2];
1940     int bestsme = INT_MAX;
1941     int sadpb = x->sadperbit16;
1942     MV tmp_mv;
1943     int search_range = 3;
1944
1945     int tmp_col_min = x->mv_col_min;
1946     int tmp_col_max = x->mv_col_max;
1947     int tmp_row_min = x->mv_row_min;
1948     int tmp_row_max = x->mv_row_max;
1949     int id = ite % 2;
1950
1951     // Initialized here because of compiler problem in Visual Studio.
1952     ref_yv12[0] = xd->plane[0].pre[0];
1953     ref_yv12[1] = xd->plane[0].pre[1];
1954
1955     // Get pred block from second frame.
1956     vp9_build_inter_predictor(ref_yv12[!id].buf,
1957                               ref_yv12[!id].stride,
1958                               second_pred, pw,
1959                               &frame_mv[refs[!id]].as_mv,
1960                               &xd->block_refs[!id]->sf,
1961                               pw, ph, 0,
1962                               kernel, MV_PRECISION_Q3,
1963                               mi_col * MI_SIZE, mi_row * MI_SIZE);
1964
1965     // Compound motion search on first ref frame.
1966     if (id)
1967       xd->plane[0].pre[0] = ref_yv12[id];
1968     vp9_set_mv_search_range(x, &ref_mv[id].as_mv);
1969
1970     // Use mv result from single mode as mvp.
1971     tmp_mv = frame_mv[refs[id]].as_mv;
1972
1973     tmp_mv.col >>= 3;
1974     tmp_mv.row >>= 3;
1975
1976     // Small-range full-pixel motion search
1977     bestsme = vp9_refining_search_8p_c(x, &tmp_mv, sadpb,
1978                                        search_range,
1979                                        &cpi->fn_ptr[bsize],
1980                                        &ref_mv[id].as_mv, second_pred);
1981     if (bestsme < INT_MAX)
1982       bestsme = vp9_get_mvpred_av_var(x, &tmp_mv, &ref_mv[id].as_mv,
1983                                       second_pred, &cpi->fn_ptr[bsize], 1);
1984
1985     x->mv_col_min = tmp_col_min;
1986     x->mv_col_max = tmp_col_max;
1987     x->mv_row_min = tmp_row_min;
1988     x->mv_row_max = tmp_row_max;
1989
1990     if (bestsme < INT_MAX) {
1991       int dis; /* TODO: use dis in distortion calculation later. */
1992       unsigned int sse;
1993       bestsme = cpi->find_fractional_mv_step(
1994           x, &tmp_mv,
1995           &ref_mv[id].as_mv,
1996           cpi->common.allow_high_precision_mv,
1997           x->errorperbit,
1998           &cpi->fn_ptr[bsize],
1999           0, cpi->sf.mv.subpel_iters_per_step,
2000           NULL,
2001           x->nmvjointcost, x->mvcost,
2002           &dis, &sse, second_pred,
2003           pw, ph);
2004     }
2005
2006     if (id)
2007       xd->plane[0].pre[0] = scaled_first_yv12;
2008
2009     if (bestsme < last_besterr[id]) {
2010       frame_mv[refs[id]].as_mv = tmp_mv;
2011       last_besterr[id] = bestsme;
2012     } else {
2013       break;
2014     }
2015   }
2016
2017   *rate_mv = 0;
2018
2019   for (ref = 0; ref < 2; ++ref) {
2020     if (scaled_ref_frame[ref]) {
2021       // restore the predictor
2022       int i;
2023       for (i = 0; i < MAX_MB_PLANE; i++)
2024         xd->plane[i].pre[ref] = backup_yv12[ref][i];
2025     }
2026
2027     *rate_mv += vp9_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
2028                                 &mbmi->ref_mvs[refs[ref]][0].as_mv,
2029                                 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2030   }
2031
2032   vpx_free(second_pred);
2033 }
2034
2035 static INLINE void restore_dst_buf(MACROBLOCKD *xd,
2036                                    uint8_t *orig_dst[MAX_MB_PLANE],
2037                                    int orig_dst_stride[MAX_MB_PLANE]) {
2038   int i;
2039   for (i = 0; i < MAX_MB_PLANE; i++) {
2040     xd->plane[i].dst.buf = orig_dst[i];
2041     xd->plane[i].dst.stride = orig_dst_stride[i];
2042   }
2043 }
2044
2045 static void rd_encode_breakout_test(VP9_COMP *cpi, MACROBLOCK *x,
2046                                     BLOCK_SIZE bsize, int *rate2,
2047                                     int64_t *distortion, int64_t *distortion_uv,
2048                                     int *disable_skip) {
2049   VP9_COMMON *cm = &cpi->common;
2050   MACROBLOCKD *xd = &x->e_mbd;
2051   const BLOCK_SIZE y_size = get_plane_block_size(bsize, &xd->plane[0]);
2052   const BLOCK_SIZE uv_size = get_plane_block_size(bsize, &xd->plane[1]);
2053   unsigned int var, sse;
2054   // Skipping threshold for ac.
2055   unsigned int thresh_ac;
2056   // Skipping threshold for dc
2057   unsigned int thresh_dc;
2058
2059   var = cpi->fn_ptr[y_size].vf(x->plane[0].src.buf, x->plane[0].src.stride,
2060                                xd->plane[0].dst.buf,
2061                                xd->plane[0].dst.stride, &sse);
2062
2063   if (x->encode_breakout > 0) {
2064     // Set a maximum for threshold to avoid big PSNR loss in low bitrate
2065     // case. Use extreme low threshold for static frames to limit skipping.
2066     const unsigned int max_thresh = (cpi->allow_encode_breakout ==
2067                                      ENCODE_BREAKOUT_LIMITED) ? 128 : 36000;
2068     // The encode_breakout input
2069     const unsigned int min_thresh =
2070         MIN(((unsigned int)x->encode_breakout << 4), max_thresh);
2071
2072     // Calculate threshold according to dequant value.
2073     thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) / 9;
2074     thresh_ac = clamp(thresh_ac, min_thresh, max_thresh);
2075
2076     // Adjust threshold according to partition size.
2077     thresh_ac >>= 8 - (b_width_log2(bsize) +
2078         b_height_log2(bsize));
2079     thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6);
2080   } else {
2081     thresh_ac = 0;
2082     thresh_dc = 0;
2083   }
2084
2085   // Y skipping condition checking
2086   if (sse < thresh_ac || sse == 0) {
2087     // dc skipping checking
2088     if ((sse - var) < thresh_dc || sse == var) {
2089       unsigned int sse_u, sse_v;
2090       unsigned int var_u, var_v;
2091
2092       var_u = cpi->fn_ptr[uv_size].vf(x->plane[1].src.buf,
2093                                       x->plane[1].src.stride,
2094                                       xd->plane[1].dst.buf,
2095                                       xd->plane[1].dst.stride, &sse_u);
2096
2097       // U skipping condition checking
2098       if ((sse_u * 4 < thresh_ac || sse_u == 0) &&
2099           (sse_u - var_u < thresh_dc || sse_u == var_u)) {
2100         var_v = cpi->fn_ptr[uv_size].vf(x->plane[2].src.buf,
2101                                         x->plane[2].src.stride,
2102                                         xd->plane[2].dst.buf,
2103                                         xd->plane[2].dst.stride, &sse_v);
2104
2105         // V skipping condition checking
2106         if ((sse_v * 4 < thresh_ac || sse_v == 0) &&
2107             (sse_v - var_v < thresh_dc || sse_v == var_v)) {
2108           x->skip = 1;
2109
2110           // The cost of skip bit needs to be added.
2111           *rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
2112
2113           // Scaling factor for SSE from spatial domain to frequency domain
2114           // is 16. Adjust distortion accordingly.
2115           *distortion_uv = (sse_u + sse_v) << 4;
2116           *distortion = (sse << 4) + *distortion_uv;
2117
2118           *disable_skip = 1;
2119         }
2120       }
2121     }
2122   }
2123 }
2124
2125 static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
2126                                  BLOCK_SIZE bsize,
2127                                  int64_t txfm_cache[],
2128                                  int *rate2, int64_t *distortion,
2129                                  int *skippable,
2130                                  int *rate_y, int64_t *distortion_y,
2131                                  int *rate_uv, int64_t *distortion_uv,
2132                                  int *disable_skip,
2133                                  int_mv (*mode_mv)[MAX_REF_FRAMES],
2134                                  int mi_row, int mi_col,
2135                                  int_mv single_newmv[MAX_REF_FRAMES],
2136                                  INTERP_FILTER (*single_filter)[MAX_REF_FRAMES],
2137                                  int (*single_skippable)[MAX_REF_FRAMES],
2138                                  int64_t *psse,
2139                                  const int64_t ref_best_rd) {
2140   VP9_COMMON *cm = &cpi->common;
2141   RD_OPT *rd_opt = &cpi->rd;
2142   MACROBLOCKD *xd = &x->e_mbd;
2143   MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi;
2144   const int is_comp_pred = has_second_ref(mbmi);
2145   const int this_mode = mbmi->mode;
2146   int_mv *frame_mv = mode_mv[this_mode];
2147   int i;
2148   int refs[2] = { mbmi->ref_frame[0],
2149     (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
2150   int_mv cur_mv[2];
2151   int64_t this_rd = 0;
2152   DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf, MAX_MB_PLANE * 64 * 64);
2153   int pred_exists = 0;
2154   int intpel_mv;
2155   int64_t rd, tmp_rd, best_rd = INT64_MAX;
2156   int best_needs_copy = 0;
2157   uint8_t *orig_dst[MAX_MB_PLANE];
2158   int orig_dst_stride[MAX_MB_PLANE];
2159   int rs = 0;
2160   INTERP_FILTER best_filter = SWITCHABLE;
2161   uint8_t skip_txfm[MAX_MB_PLANE << 2] = {0};
2162   int64_t bsse[MAX_MB_PLANE << 2] = {0};
2163
2164   int bsl = mi_width_log2_lookup[bsize];
2165   int pred_filter_search = cpi->sf.cb_pred_filter_search ?
2166       (((mi_row + mi_col) >> bsl) +
2167        get_chessboard_index(cm->current_video_frame)) & 0x1 : 0;
2168
2169   if (pred_filter_search) {
2170     INTERP_FILTER af = SWITCHABLE, lf = SWITCHABLE;
2171     if (xd->up_available)
2172       af = xd->mi[-xd->mi_stride].src_mi->mbmi.interp_filter;
2173     if (xd->left_available)
2174       lf = xd->mi[-1].src_mi->mbmi.interp_filter;
2175
2176     if ((this_mode != NEWMV) || (af == lf))
2177       best_filter = af;
2178   }
2179
2180   if (is_comp_pred) {
2181     if (frame_mv[refs[0]].as_int == INVALID_MV ||
2182         frame_mv[refs[1]].as_int == INVALID_MV)
2183       return INT64_MAX;
2184
2185     if (cpi->sf.adaptive_mode_search) {
2186       if (single_filter[this_mode][refs[0]] ==
2187           single_filter[this_mode][refs[1]])
2188         best_filter = single_filter[this_mode][refs[0]];
2189     }
2190   }
2191
2192   if (this_mode == NEWMV) {
2193     int rate_mv;
2194     if (is_comp_pred) {
2195       // Initialize mv using single prediction mode result.
2196       frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
2197       frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
2198
2199       if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
2200         joint_motion_search(cpi, x, bsize, frame_mv,
2201                             mi_row, mi_col, single_newmv, &rate_mv);
2202       } else {
2203         rate_mv  = vp9_mv_bit_cost(&frame_mv[refs[0]].as_mv,
2204                                    &mbmi->ref_mvs[refs[0]][0].as_mv,
2205                                    x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2206         rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]].as_mv,
2207                                    &mbmi->ref_mvs[refs[1]][0].as_mv,
2208                                    x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2209       }
2210       *rate2 += rate_mv;
2211     } else {
2212       int_mv tmp_mv;
2213       single_motion_search(cpi, x, bsize, mi_row, mi_col,
2214                            &tmp_mv, &rate_mv);
2215       if (tmp_mv.as_int == INVALID_MV)
2216         return INT64_MAX;
2217       *rate2 += rate_mv;
2218       frame_mv[refs[0]].as_int =
2219           xd->mi[0].src_mi->bmi[0].as_mv[0].as_int = tmp_mv.as_int;
2220       single_newmv[refs[0]].as_int = tmp_mv.as_int;
2221     }
2222   }
2223
2224   for (i = 0; i < is_comp_pred + 1; ++i) {
2225     cur_mv[i] = frame_mv[refs[i]];
2226     // Clip "next_nearest" so that it does not extend to far out of image
2227     if (this_mode != NEWMV)
2228       clamp_mv2(&cur_mv[i].as_mv, xd);
2229
2230     if (mv_check_bounds(x, &cur_mv[i].as_mv))
2231       return INT64_MAX;
2232     mbmi->mv[i].as_int = cur_mv[i].as_int;
2233   }
2234
2235   // do first prediction into the destination buffer. Do the next
2236   // prediction into a temporary buffer. Then keep track of which one
2237   // of these currently holds the best predictor, and use the other
2238   // one for future predictions. In the end, copy from tmp_buf to
2239   // dst if necessary.
2240   for (i = 0; i < MAX_MB_PLANE; i++) {
2241     orig_dst[i] = xd->plane[i].dst.buf;
2242     orig_dst_stride[i] = xd->plane[i].dst.stride;
2243   }
2244
2245   /* We don't include the cost of the second reference here, because there
2246    * are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other
2247    * words if you present them in that order, the second one is always known
2248    * if the first is known */
2249   *rate2 += cost_mv_ref(cpi, this_mode, mbmi->mode_context[refs[0]]);
2250
2251   if (RDCOST(x->rdmult, x->rddiv, *rate2, 0) > ref_best_rd &&
2252       mbmi->mode != NEARESTMV)
2253     return INT64_MAX;
2254
2255   pred_exists = 0;
2256   // Are all MVs integer pel for Y and UV
2257   intpel_mv = !mv_has_subpel(&mbmi->mv[0].as_mv);
2258   if (is_comp_pred)
2259     intpel_mv &= !mv_has_subpel(&mbmi->mv[1].as_mv);
2260
2261   // Search for best switchable filter by checking the variance of
2262   // pred error irrespective of whether the filter will be used
2263   rd_opt->mask_filter = 0;
2264   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
2265     rd_opt->filter_cache[i] = INT64_MAX;
2266
2267   if (cm->interp_filter != BILINEAR) {
2268     if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) {
2269       best_filter = EIGHTTAP;
2270     } else if (best_filter == SWITCHABLE) {
2271       int newbest;
2272       int tmp_rate_sum = 0;
2273       int64_t tmp_dist_sum = 0;
2274
2275       for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
2276         int j;
2277         int64_t rs_rd;
2278         mbmi->interp_filter = i;
2279         rs = vp9_get_switchable_rate(cpi);
2280         rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
2281
2282         if (i > 0 && intpel_mv) {
2283           rd = RDCOST(x->rdmult, x->rddiv, tmp_rate_sum, tmp_dist_sum);
2284           rd_opt->filter_cache[i] = rd;
2285           rd_opt->filter_cache[SWITCHABLE_FILTERS] =
2286               MIN(rd_opt->filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
2287           if (cm->interp_filter == SWITCHABLE)
2288             rd += rs_rd;
2289           rd_opt->mask_filter = MAX(rd_opt->mask_filter, rd);
2290         } else {
2291           int rate_sum = 0;
2292           int64_t dist_sum = 0;
2293           if (i > 0 && cpi->sf.adaptive_interp_filter_search &&
2294               (cpi->sf.interp_filter_search_mask & (1 << i))) {
2295             rate_sum = INT_MAX;
2296             dist_sum = INT64_MAX;
2297             continue;
2298           }
2299
2300           if ((cm->interp_filter == SWITCHABLE &&
2301                (!i || best_needs_copy)) ||
2302               (cm->interp_filter != SWITCHABLE &&
2303                (cm->interp_filter == mbmi->interp_filter ||
2304                 (i == 0 && intpel_mv)))) {
2305             restore_dst_buf(xd, orig_dst, orig_dst_stride);
2306           } else {
2307             for (j = 0; j < MAX_MB_PLANE; j++) {
2308               xd->plane[j].dst.buf = tmp_buf + j * 64 * 64;
2309               xd->plane[j].dst.stride = 64;
2310             }
2311           }
2312           vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
2313           model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum);
2314
2315           rd = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum);
2316           rd_opt->filter_cache[i] = rd;
2317           rd_opt->filter_cache[SWITCHABLE_FILTERS] =
2318               MIN(rd_opt->filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
2319           if (cm->interp_filter == SWITCHABLE)
2320             rd += rs_rd;
2321           rd_opt->mask_filter = MAX(rd_opt->mask_filter, rd);
2322
2323           if (i == 0 && intpel_mv) {
2324             tmp_rate_sum = rate_sum;
2325             tmp_dist_sum = dist_sum;
2326           }
2327         }
2328
2329         if (i == 0 && cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
2330           if (rd / 2 > ref_best_rd) {
2331             restore_dst_buf(xd, orig_dst, orig_dst_stride);
2332             return INT64_MAX;
2333           }
2334         }
2335         newbest = i == 0 || rd < best_rd;
2336
2337         if (newbest) {
2338           best_rd = rd;
2339           best_filter = mbmi->interp_filter;
2340           if (cm->interp_filter == SWITCHABLE && i && !intpel_mv)
2341             best_needs_copy = !best_needs_copy;
2342           vpx_memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm));
2343           vpx_memcpy(bsse, x->bsse, sizeof(bsse));
2344         }
2345
2346         if ((cm->interp_filter == SWITCHABLE && newbest) ||
2347             (cm->interp_filter != SWITCHABLE &&
2348              cm->interp_filter == mbmi->interp_filter)) {
2349           pred_exists = 1;
2350           tmp_rd = best_rd;
2351         }
2352       }
2353       restore_dst_buf(xd, orig_dst, orig_dst_stride);
2354     }
2355   }
2356   // Set the appropriate filter
2357   mbmi->interp_filter = cm->interp_filter != SWITCHABLE ?
2358       cm->interp_filter : best_filter;
2359   rs = cm->interp_filter == SWITCHABLE ? vp9_get_switchable_rate(cpi) : 0;
2360
2361   if (pred_exists) {
2362     if (best_needs_copy) {
2363       // again temporarily set the buffers to local memory to prevent a memcpy
2364       for (i = 0; i < MAX_MB_PLANE; i++) {
2365         xd->plane[i].dst.buf = tmp_buf + i * 64 * 64;
2366         xd->plane[i].dst.stride = 64;
2367       }
2368     }
2369     rd = tmp_rd + RDCOST(x->rdmult, x->rddiv, rs, 0);
2370   } else {
2371     int tmp_rate;
2372     int64_t tmp_dist;
2373     // Handles the special case when a filter that is not in the
2374     // switchable list (ex. bilinear) is indicated at the frame level, or
2375     // skip condition holds.
2376     vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
2377     model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist);
2378     rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist);
2379     vpx_memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm));
2380     vpx_memcpy(bsse, x->bsse, sizeof(bsse));
2381   }
2382
2383   if (!is_comp_pred)
2384     single_filter[this_mode][refs[0]] = mbmi->interp_filter;
2385
2386   if (cpi->sf.adaptive_mode_search)
2387     if (is_comp_pred)
2388       if (single_skippable[this_mode][refs[0]] &&
2389           single_skippable[this_mode][refs[1]])
2390         vpx_memset(skip_txfm, 1, sizeof(skip_txfm));
2391
2392   if (cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
2393     // if current pred_error modeled rd is substantially more than the best
2394     // so far, do not bother doing full rd
2395     if (rd / 2 > ref_best_rd) {
2396       restore_dst_buf(xd, orig_dst, orig_dst_stride);
2397       return INT64_MAX;
2398     }
2399   }
2400
2401   if (cm->interp_filter == SWITCHABLE)
2402     *rate2 += rs;
2403
2404   if (!is_comp_pred) {
2405     if (cpi->allow_encode_breakout)
2406       rd_encode_breakout_test(cpi, x, bsize, rate2, distortion, distortion_uv,
2407                               disable_skip);
2408   }
2409
2410   vpx_memcpy(x->skip_txfm, skip_txfm, sizeof(skip_txfm));
2411   vpx_memcpy(x->bsse, bsse, sizeof(bsse));
2412
2413   if (!x->skip) {
2414     int skippable_y, skippable_uv;
2415     int64_t sseuv = INT64_MAX;
2416     int64_t rdcosty = INT64_MAX;
2417
2418     // Y cost and distortion
2419     vp9_subtract_plane(x, bsize, 0);
2420     super_block_yrd(cpi, x, rate_y, distortion_y, &skippable_y, psse,
2421                     bsize, txfm_cache, ref_best_rd);
2422
2423     if (*rate_y == INT_MAX) {
2424       *rate2 = INT_MAX;
2425       *distortion = INT64_MAX;
2426       restore_dst_buf(xd, orig_dst, orig_dst_stride);
2427       return INT64_MAX;
2428     }
2429
2430     *rate2 += *rate_y;
2431     *distortion += *distortion_y;
2432
2433     rdcosty = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
2434     rdcosty = MIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse));
2435
2436     super_block_uvrd(cpi, x, rate_uv, distortion_uv, &skippable_uv, &sseuv,
2437                      bsize, ref_best_rd - rdcosty);
2438     if (*rate_uv == INT_MAX) {
2439       *rate2 = INT_MAX;
2440       *distortion = INT64_MAX;
2441       restore_dst_buf(xd, orig_dst, orig_dst_stride);
2442       return INT64_MAX;
2443     }
2444
2445     *psse += sseuv;
2446     *rate2 += *rate_uv;
2447     *distortion += *distortion_uv;
2448     *skippable = skippable_y && skippable_uv;
2449   }
2450
2451   if (!is_comp_pred)
2452     single_skippable[this_mode][refs[0]] = *skippable;
2453
2454   restore_dst_buf(xd, orig_dst, orig_dst_stride);
2455   return this_rd;  // if 0, this will be re-calculated by caller
2456 }
2457
2458 void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
2459                                int *returnrate, int64_t *returndist,
2460                                BLOCK_SIZE bsize,
2461                                PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
2462   VP9_COMMON *const cm = &cpi->common;
2463   MACROBLOCKD *const xd = &x->e_mbd;
2464   struct macroblockd_plane *const pd = xd->plane;
2465   int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
2466   int y_skip = 0, uv_skip = 0;
2467   int64_t dist_y = 0, dist_uv = 0, tx_cache[TX_MODES] = { 0 };
2468   TX_SIZE max_uv_tx_size;
2469   x->skip_encode = 0;
2470   ctx->skip = 0;
2471   xd->mi[0].src_mi->mbmi.ref_frame[0] = INTRA_FRAME;
2472
2473   if (bsize >= BLOCK_8X8) {
2474     if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly,
2475                                &dist_y, &y_skip, bsize, tx_cache,
2476                                best_rd) >= best_rd) {
2477       *returnrate = INT_MAX;
2478       return;
2479     }
2480     max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0].src_mi->mbmi.tx_size, bsize,
2481                                          pd[1].subsampling_x,
2482                                          pd[1].subsampling_y);
2483     rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
2484                             &dist_uv, &uv_skip, bsize, max_uv_tx_size);
2485   } else {
2486     y_skip = 0;
2487     if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate_y, &rate_y_tokenonly,
2488                                      &dist_y, best_rd) >= best_rd) {
2489       *returnrate = INT_MAX;
2490       return;
2491     }
2492     max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0].src_mi->mbmi.tx_size, bsize,
2493                                          pd[1].subsampling_x,
2494                                          pd[1].subsampling_y);
2495     rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
2496                             &dist_uv, &uv_skip, BLOCK_8X8, max_uv_tx_size);
2497   }
2498
2499   if (y_skip && uv_skip) {
2500     *returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
2501                   vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
2502     *returndist = dist_y + dist_uv;
2503     vp9_zero(ctx->tx_rd_diff);
2504   } else {
2505     int i;
2506     *returnrate = rate_y + rate_uv + vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
2507     *returndist = dist_y + dist_uv;
2508     if (cpi->sf.tx_size_search_method == USE_FULL_RD)
2509       for (i = 0; i < TX_MODES; i++) {
2510         if (tx_cache[i] < INT64_MAX && tx_cache[cm->tx_mode] < INT64_MAX)
2511           ctx->tx_rd_diff[i] = tx_cache[i] - tx_cache[cm->tx_mode];
2512         else
2513           ctx->tx_rd_diff[i] = 0;
2514       }
2515   }
2516
2517   ctx->mic = *xd->mi[0].src_mi;
2518 }
2519
2520 // Updating rd_thresh_freq_fact[] here means that the different
2521 // partition/block sizes are handled independently based on the best
2522 // choice for the current partition. It may well be better to keep a scaled
2523 // best rd so far value and update rd_thresh_freq_fact based on the mode/size
2524 // combination that wins out.
2525 static void update_rd_thresh_fact(VP9_COMP *cpi, int bsize,
2526                                   int best_mode_index) {
2527   if (cpi->sf.adaptive_rd_thresh > 0) {
2528     const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES;
2529     int mode;
2530     for (mode = 0; mode < top_mode; ++mode) {
2531       int *const fact = &cpi->rd.thresh_freq_fact[bsize][mode];
2532
2533       if (mode == best_mode_index) {
2534         *fact -= (*fact >> 3);
2535       } else {
2536         *fact = MIN(*fact + RD_THRESH_INC,
2537                     cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT);
2538       }
2539     }
2540   }
2541 }
2542
2543 int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
2544                                   const TileInfo *const tile,
2545                                   int mi_row, int mi_col,
2546                                   int *returnrate,
2547                                   int64_t *returndistortion,
2548                                   BLOCK_SIZE bsize,
2549                                   PICK_MODE_CONTEXT *ctx,
2550                                   int64_t best_rd_so_far) {
2551   VP9_COMMON *const cm = &cpi->common;
2552   RD_OPT *const rd_opt = &cpi->rd;
2553   MACROBLOCKD *const xd = &x->e_mbd;
2554   MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
2555   const struct segmentation *const seg = &cm->seg;
2556   struct macroblockd_plane *const pd = xd->plane;
2557   PREDICTION_MODE this_mode;
2558   MV_REFERENCE_FRAME ref_frame, second_ref_frame;
2559   unsigned char segment_id = mbmi->segment_id;
2560   int comp_pred, i, k;
2561   int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
2562   struct buf_2d yv12_mb[4][MAX_MB_PLANE];
2563   int_mv single_newmv[MAX_REF_FRAMES] = { { 0 } };
2564   INTERP_FILTER single_inter_filter[MB_MODE_COUNT][MAX_REF_FRAMES];
2565   int single_skippable[MB_MODE_COUNT][MAX_REF_FRAMES];
2566   static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
2567                                     VP9_ALT_FLAG };
2568   int64_t best_rd = best_rd_so_far;
2569   int64_t best_tx_rd[TX_MODES];
2570   int64_t best_tx_diff[TX_MODES];
2571   int64_t best_pred_diff[REFERENCE_MODES];
2572   int64_t best_pred_rd[REFERENCE_MODES];
2573   int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
2574   int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
2575   MB_MODE_INFO best_mbmode;
2576   int best_mode_skippable = 0;
2577   int midx, best_mode_index = -1;
2578   unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
2579   vp9_prob comp_mode_p;
2580   int64_t best_intra_rd = INT64_MAX;
2581   unsigned int best_pred_sse = UINT_MAX;
2582   PREDICTION_MODE best_intra_mode = DC_PRED;
2583   int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES];
2584   int64_t dist_uv[TX_SIZES];
2585   int skip_uv[TX_SIZES];
2586   PREDICTION_MODE mode_uv[TX_SIZES];
2587   const int intra_cost_penalty =
2588       20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
2589   int best_skip2 = 0;
2590   uint8_t ref_frame_skip_mask[2] = { 0 };
2591   uint16_t mode_skip_mask[MAX_REF_FRAMES] = { 0 };
2592   int mode_skip_start = cpi->sf.mode_skip_start + 1;
2593   const int *const rd_threshes = rd_opt->threshes[segment_id][bsize];
2594   const int *const rd_thresh_freq_fact = rd_opt->thresh_freq_fact[bsize];
2595   int mode_threshold[MAX_MODES];
2596   int *mode_map = rd_opt->mode_map[bsize];
2597   const int mode_search_skip_flags = cpi->sf.mode_search_skip_flags;
2598   vp9_zero(best_mbmode);
2599
2600   x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
2601
2602   estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
2603                            &comp_mode_p);
2604
2605   for (i = 0; i < REFERENCE_MODES; ++i)
2606     best_pred_rd[i] = INT64_MAX;
2607   for (i = 0; i < TX_MODES; i++)
2608     best_tx_rd[i] = INT64_MAX;
2609   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
2610     best_filter_rd[i] = INT64_MAX;
2611   for (i = 0; i < TX_SIZES; i++)
2612     rate_uv_intra[i] = INT_MAX;
2613   for (i = 0; i < MAX_REF_FRAMES; ++i)
2614     x->pred_sse[i] = INT_MAX;
2615   for (i = 0; i < MB_MODE_COUNT; ++i) {
2616     for (k = 0; k < MAX_REF_FRAMES; ++k) {
2617       single_inter_filter[i][k] = SWITCHABLE;
2618       single_skippable[i][k] = 0;
2619     }
2620   }
2621
2622   *returnrate = INT_MAX;
2623
2624   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
2625     x->pred_mv_sad[ref_frame] = INT_MAX;
2626     if (cpi->ref_frame_flags & flag_list[ref_frame]) {
2627       setup_buffer_inter(cpi, x, tile, ref_frame, bsize, mi_row, mi_col,
2628                          frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb);
2629     }
2630     frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
2631     frame_mv[ZEROMV][ref_frame].as_int = 0;
2632   }
2633
2634   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
2635     if (!(cpi->ref_frame_flags & flag_list[ref_frame])) {
2636       // Skip checking missing references in both single and compound reference
2637       // modes. Note that a mode will be skipped iff both reference frames
2638       // are masked out.
2639       ref_frame_skip_mask[0] |= (1 << ref_frame);
2640       ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
2641     } else if (cpi->sf.reference_masking) {
2642       for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
2643         // Skip fixed mv modes for poor references
2644         if ((x->pred_mv_sad[ref_frame] >> 2) > x->pred_mv_sad[i]) {
2645           mode_skip_mask[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
2646           break;
2647         }
2648       }
2649     }
2650     // If the segment reference frame feature is enabled....
2651     // then do nothing if the current ref frame is not allowed..
2652     if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
2653         vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
2654       ref_frame_skip_mask[0] |= (1 << ref_frame);
2655       ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
2656     }
2657   }
2658
2659   // Disable this drop out case if the ref frame
2660   // segment level feature is enabled for this segment. This is to
2661   // prevent the possibility that we end up unable to pick any mode.
2662   if (!vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
2663     // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
2664     // unless ARNR filtering is enabled in which case we want
2665     // an unfiltered alternative. We allow near/nearest as well
2666     // because they may result in zero-zero MVs but be cheaper.
2667     if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
2668       ref_frame_skip_mask[0] = (1 << LAST_FRAME) | (1 << GOLDEN_FRAME);
2669       ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;
2670       mode_skip_mask[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO;
2671       if (frame_mv[NEARMV][ALTREF_FRAME].as_int != 0)
2672         mode_skip_mask[ALTREF_FRAME] |= (1 << NEARMV);
2673       if (frame_mv[NEARESTMV][ALTREF_FRAME].as_int != 0)
2674         mode_skip_mask[ALTREF_FRAME] |= (1 << NEARESTMV);
2675     }
2676   }
2677
2678   if (cpi->rc.is_src_frame_alt_ref) {
2679     if (cpi->sf.alt_ref_search_fp) {
2680       mode_skip_mask[ALTREF_FRAME] = 0;
2681       ref_frame_skip_mask[0] = ~(1 << ALTREF_FRAME);
2682       ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;
2683     }
2684   }
2685
2686   if (bsize > cpi->sf.max_intra_bsize) {
2687     ref_frame_skip_mask[0] |= (1 << INTRA_FRAME);
2688     ref_frame_skip_mask[1] |= (1 << INTRA_FRAME);
2689   }
2690
2691   mode_skip_mask[INTRA_FRAME] |=
2692       ~(cpi->sf.intra_y_mode_mask[max_txsize_lookup[bsize]]);
2693
2694   for (i = 0; i < MAX_MODES; ++i)
2695     mode_threshold[i] = ((int64_t)rd_threshes[i] * rd_thresh_freq_fact[i]) >> 5;
2696
2697   midx =  cpi->sf.schedule_mode_search ? mode_skip_start : 0;
2698   while (midx > 4) {
2699     uint8_t end_pos = 0;
2700     for (i = 5; i < midx; ++i) {
2701       if (mode_threshold[mode_map[i - 1]] > mode_threshold[mode_map[i]]) {
2702         uint8_t tmp = mode_map[i];
2703         mode_map[i] = mode_map[i - 1];
2704         mode_map[i - 1] = tmp;
2705         end_pos = i;
2706       }
2707     }
2708     midx = end_pos;
2709   }
2710
2711   for (midx = 0; midx < MAX_MODES; ++midx) {
2712     int mode_index = mode_map[midx];
2713     int mode_excluded = 0;
2714     int64_t this_rd = INT64_MAX;
2715     int disable_skip = 0;
2716     int compmode_cost = 0;
2717     int rate2 = 0, rate_y = 0, rate_uv = 0;
2718     int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
2719     int skippable = 0;
2720     int64_t tx_cache[TX_MODES];
2721     int this_skip2 = 0;
2722     int64_t total_sse = INT64_MAX;
2723     int early_term = 0;
2724
2725     this_mode = vp9_mode_order[mode_index].mode;
2726     ref_frame = vp9_mode_order[mode_index].ref_frame[0];
2727     second_ref_frame = vp9_mode_order[mode_index].ref_frame[1];
2728
2729     // Look at the reference frame of the best mode so far and set the
2730     // skip mask to look at a subset of the remaining modes.
2731     if (midx == mode_skip_start && best_mode_index >= 0) {
2732       switch (best_mbmode.ref_frame[0]) {
2733         case INTRA_FRAME:
2734           break;
2735         case LAST_FRAME:
2736           ref_frame_skip_mask[0] |= LAST_FRAME_MODE_MASK;
2737           ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
2738           break;
2739         case GOLDEN_FRAME:
2740           ref_frame_skip_mask[0] |= GOLDEN_FRAME_MODE_MASK;
2741           ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
2742           break;
2743         case ALTREF_FRAME:
2744           ref_frame_skip_mask[0] |= ALT_REF_MODE_MASK;
2745           break;
2746         case NONE:
2747         case MAX_REF_FRAMES:
2748           assert(0 && "Invalid Reference frame");
2749           break;
2750       }
2751     }
2752
2753     if (ref_frame_skip_mask[0] & (1 << ref_frame) &&
2754         ref_frame_skip_mask[1] & (1 << MAX(0, second_ref_frame)))
2755       continue;
2756
2757     if (mode_skip_mask[ref_frame] & (1 << this_mode))
2758       continue;
2759
2760     // Test best rd so far against threshold for trying this mode.
2761     if (best_mode_skippable && cpi->sf.schedule_mode_search)
2762       mode_threshold[mode_index] <<= 1;
2763
2764     if (best_rd < mode_threshold[mode_index])
2765       continue;
2766
2767     if (cpi->sf.motion_field_mode_search) {
2768       const int mi_width  = MIN(num_8x8_blocks_wide_lookup[bsize],
2769                                 tile->mi_col_end - mi_col);
2770       const int mi_height = MIN(num_8x8_blocks_high_lookup[bsize],
2771                                 tile->mi_row_end - mi_row);
2772       const int bsl = mi_width_log2(bsize);
2773       int cb_partition_search_ctrl = (((mi_row + mi_col) >> bsl)
2774           + get_chessboard_index(cm->current_video_frame)) & 0x1;
2775       MB_MODE_INFO *ref_mbmi;
2776       int const_motion = 1;
2777       int skip_ref_frame = !cb_partition_search_ctrl;
2778       MV_REFERENCE_FRAME rf = NONE;
2779       int_mv ref_mv;
2780       ref_mv.as_int = INVALID_MV;
2781
2782       if ((mi_row - 1) >= tile->mi_row_start) {
2783         ref_mv = xd->mi[-xd->mi_stride].src_mi->mbmi.mv[0];
2784         rf = xd->mi[-xd->mi_stride].src_mi->mbmi.ref_frame[0];
2785         for (i = 0; i < mi_width; ++i) {
2786           ref_mbmi = &xd->mi[-xd->mi_stride + i].src_mi->mbmi;
2787           const_motion &= (ref_mv.as_int == ref_mbmi->mv[0].as_int) &&
2788                           (ref_frame == ref_mbmi->ref_frame[0]);
2789           skip_ref_frame &= (rf == ref_mbmi->ref_frame[0]);
2790         }
2791       }
2792
2793       if ((mi_col - 1) >= tile->mi_col_start) {
2794         if (ref_mv.as_int == INVALID_MV)
2795           ref_mv = xd->mi[-1].src_mi->mbmi.mv[0];
2796         if (rf == NONE)
2797           rf = xd->mi[-1].src_mi->mbmi.ref_frame[0];
2798         for (i = 0; i < mi_height; ++i) {
2799           ref_mbmi = &xd->mi[i * xd->mi_stride - 1].src_mi->mbmi;
2800           const_motion &= (ref_mv.as_int == ref_mbmi->mv[0].as_int) &&
2801                           (ref_frame == ref_mbmi->ref_frame[0]);
2802           skip_ref_frame &= (rf == ref_mbmi->ref_frame[0]);
2803         }
2804       }
2805
2806       if (skip_ref_frame && this_mode != NEARESTMV && this_mode != NEWMV)
2807         if (rf > INTRA_FRAME)
2808           if (ref_frame != rf)
2809             continue;
2810
2811       if (const_motion)
2812         if (this_mode == NEARMV || this_mode == ZEROMV)
2813           continue;
2814     }
2815
2816     comp_pred = second_ref_frame > INTRA_FRAME;
2817     if (comp_pred) {
2818       if (!cm->allow_comp_inter_inter)
2819         continue;
2820
2821       // Skip compound inter modes if ARF is not available.
2822       if (!(cpi->ref_frame_flags & flag_list[second_ref_frame]))
2823         continue;
2824
2825       // Do not allow compound prediction if the segment level reference frame
2826       // feature is in use as in this case there can only be one reference.
2827       if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
2828         continue;
2829
2830       if ((mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
2831           best_mode_index >= 0 && best_mbmode.ref_frame[0] == INTRA_FRAME)
2832         continue;
2833
2834       mode_excluded = cm->reference_mode == SINGLE_REFERENCE;
2835     } else {
2836       if (ref_frame != INTRA_FRAME)
2837         mode_excluded = cm->reference_mode == COMPOUND_REFERENCE;
2838     }
2839
2840     if (ref_frame == INTRA_FRAME) {
2841       if (cpi->sf.adaptive_mode_search)
2842         if ((x->source_variance << num_pels_log2_lookup[bsize]) > best_pred_sse)
2843           continue;
2844
2845       if (this_mode != DC_PRED) {
2846         // Disable intra modes other than DC_PRED for blocks with low variance
2847         // Threshold for intra skipping based on source variance
2848         // TODO(debargha): Specialize the threshold for super block sizes
2849         const unsigned int skip_intra_var_thresh = 64;
2850         if ((mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
2851             x->source_variance < skip_intra_var_thresh)
2852           continue;
2853         // Only search the oblique modes if the best so far is
2854         // one of the neighboring directional modes
2855         if ((mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
2856             (this_mode >= D45_PRED && this_mode <= TM_PRED)) {
2857           if (best_mode_index >= 0 &&
2858               best_mbmode.ref_frame[0] > INTRA_FRAME)
2859             continue;
2860         }
2861         if (mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
2862           if (conditional_skipintra(this_mode, best_intra_mode))
2863               continue;
2864         }
2865       }
2866     } else {
2867       const MV_REFERENCE_FRAME ref_frames[2] = {ref_frame, second_ref_frame};
2868       if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv,
2869                               this_mode, ref_frames))
2870         continue;
2871     }
2872
2873     mbmi->mode = this_mode;
2874     mbmi->uv_mode = DC_PRED;
2875     mbmi->ref_frame[0] = ref_frame;
2876     mbmi->ref_frame[1] = second_ref_frame;
2877     // Evaluate all sub-pel filters irrespective of whether we can use
2878     // them for this frame.
2879     mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
2880                                                           : cm->interp_filter;
2881     mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
2882
2883     x->skip = 0;
2884     set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
2885
2886     // Select prediction reference frames.
2887     for (i = 0; i < MAX_MB_PLANE; i++) {
2888       xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
2889       if (comp_pred)
2890         xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
2891     }
2892
2893     for (i = 0; i < TX_MODES; ++i)
2894       tx_cache[i] = INT64_MAX;
2895
2896     if (ref_frame == INTRA_FRAME) {
2897       TX_SIZE uv_tx;
2898       super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable,
2899                       NULL, bsize, tx_cache, best_rd);
2900
2901       if (rate_y == INT_MAX)
2902         continue;
2903
2904       uv_tx = get_uv_tx_size_impl(mbmi->tx_size, bsize, pd[1].subsampling_x,
2905                                   pd[1].subsampling_y);
2906       if (rate_uv_intra[uv_tx] == INT_MAX) {
2907         choose_intra_uv_mode(cpi, ctx, bsize, uv_tx,
2908                              &rate_uv_intra[uv_tx], &rate_uv_tokenonly[uv_tx],
2909                              &dist_uv[uv_tx], &skip_uv[uv_tx], &mode_uv[uv_tx]);
2910       }
2911
2912       rate_uv = rate_uv_tokenonly[uv_tx];
2913       distortion_uv = dist_uv[uv_tx];
2914       skippable = skippable && skip_uv[uv_tx];
2915       mbmi->uv_mode = mode_uv[uv_tx];
2916
2917       rate2 = rate_y + cpi->mbmode_cost[mbmi->mode] + rate_uv_intra[uv_tx];
2918       if (this_mode != DC_PRED && this_mode != TM_PRED)
2919         rate2 += intra_cost_penalty;
2920       distortion2 = distortion_y + distortion_uv;
2921     } else {
2922       this_rd = handle_inter_mode(cpi, x, bsize,
2923                                   tx_cache,
2924                                   &rate2, &distortion2, &skippable,
2925                                   &rate_y, &distortion_y,
2926                                   &rate_uv, &distortion_uv,
2927                                   &disable_skip, frame_mv,
2928                                   mi_row, mi_col,
2929                                   single_newmv, single_inter_filter,
2930                                   single_skippable, &total_sse, best_rd);
2931       if (this_rd == INT64_MAX)
2932         continue;
2933
2934       compmode_cost = vp9_cost_bit(comp_mode_p, comp_pred);
2935
2936       if (cm->reference_mode == REFERENCE_MODE_SELECT)
2937         rate2 += compmode_cost;
2938     }
2939
2940     // Estimate the reference frame signaling cost and add it
2941     // to the rolling cost variable.
2942     if (comp_pred) {
2943       rate2 += ref_costs_comp[ref_frame];
2944     } else {
2945       rate2 += ref_costs_single[ref_frame];
2946     }
2947
2948     if (!disable_skip) {
2949       if (skippable) {
2950         // Back out the coefficient coding costs
2951         rate2 -= (rate_y + rate_uv);
2952
2953         // Cost the skip mb case
2954         rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
2955       } else if (ref_frame != INTRA_FRAME && !xd->lossless) {
2956         if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
2957             RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
2958           // Add in the cost of the no skip flag.
2959           rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
2960         } else {
2961           // FIXME(rbultje) make this work for splitmv also
2962           rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
2963           distortion2 = total_sse;
2964           assert(total_sse >= 0);
2965           rate2 -= (rate_y + rate_uv);
2966           this_skip2 = 1;
2967         }
2968       } else {
2969         // Add in the cost of the no skip flag.
2970         rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
2971       }
2972
2973       // Calculate the final RD estimate for this mode.
2974       this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
2975     }
2976
2977     if (ref_frame == INTRA_FRAME) {
2978     // Keep record of best intra rd
2979       if (this_rd < best_intra_rd) {
2980         best_intra_rd = this_rd;
2981         best_intra_mode = mbmi->mode;
2982       }
2983     }
2984
2985     if (!disable_skip && ref_frame == INTRA_FRAME) {
2986       for (i = 0; i < REFERENCE_MODES; ++i)
2987         best_pred_rd[i] = MIN(best_pred_rd[i], this_rd);
2988       for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
2989         best_filter_rd[i] = MIN(best_filter_rd[i], this_rd);
2990     }
2991
2992     // Did this mode help.. i.e. is it the new best mode
2993     if (this_rd < best_rd || x->skip) {
2994       int max_plane = MAX_MB_PLANE;
2995       if (!mode_excluded) {
2996         // Note index of best mode so far
2997         best_mode_index = mode_index;
2998
2999         if (ref_frame == INTRA_FRAME) {
3000           /* required for left and above block mv */
3001           mbmi->mv[0].as_int = 0;
3002           max_plane = 1;
3003         } else {
3004           best_pred_sse = x->pred_sse[ref_frame];
3005         }
3006
3007         *returnrate = rate2;
3008         *returndistortion = distortion2;
3009         best_rd = this_rd;
3010         best_mbmode = *mbmi;
3011         best_skip2 = this_skip2;
3012         best_mode_skippable = skippable;
3013
3014         if (!x->select_tx_size)
3015           swap_block_ptr(x, ctx, 1, 0, 0, max_plane);
3016         vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
3017                    sizeof(uint8_t) * ctx->num_4x4_blk);
3018
3019         // TODO(debargha): enhance this test with a better distortion prediction
3020         // based on qp, activity mask and history
3021         if ((mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
3022             (mode_index > MIN_EARLY_TERM_INDEX)) {
3023           const int qstep = xd->plane[0].dequant[1];
3024           // TODO(debargha): Enhance this by specializing for each mode_index
3025           int scale = 4;
3026           if (x->source_variance < UINT_MAX) {
3027             const int var_adjust = (x->source_variance < 16);
3028             scale -= var_adjust;
3029           }
3030           if (ref_frame > INTRA_FRAME &&
3031               distortion2 * scale < qstep * qstep) {
3032             early_term = 1;
3033           }
3034         }
3035       }
3036     }
3037
3038     /* keep record of best compound/single-only prediction */
3039     if (!disable_skip && ref_frame != INTRA_FRAME) {
3040       int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
3041
3042       if (cm->reference_mode == REFERENCE_MODE_SELECT) {
3043         single_rate = rate2 - compmode_cost;
3044         hybrid_rate = rate2;
3045       } else {
3046         single_rate = rate2;
3047         hybrid_rate = rate2 + compmode_cost;
3048       }
3049
3050       single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
3051       hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
3052
3053       if (!comp_pred) {
3054         if (single_rd < best_pred_rd[SINGLE_REFERENCE])
3055           best_pred_rd[SINGLE_REFERENCE] = single_rd;
3056       } else {
3057         if (single_rd < best_pred_rd[COMPOUND_REFERENCE])
3058           best_pred_rd[COMPOUND_REFERENCE] = single_rd;
3059       }
3060       if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT])
3061         best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
3062
3063       /* keep record of best filter type */
3064       if (!mode_excluded && cm->interp_filter != BILINEAR) {
3065         int64_t ref = rd_opt->filter_cache[cm->interp_filter == SWITCHABLE ?
3066                               SWITCHABLE_FILTERS : cm->interp_filter];
3067
3068         for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
3069           int64_t adj_rd;
3070           if (ref == INT64_MAX)
3071             adj_rd = 0;
3072           else if (rd_opt->filter_cache[i] == INT64_MAX)
3073             // when early termination is triggered, the encoder does not have
3074             // access to the rate-distortion cost. it only knows that the cost
3075             // should be above the maximum valid value. hence it takes the known
3076             // maximum plus an arbitrary constant as the rate-distortion cost.
3077             adj_rd = rd_opt->mask_filter - ref + 10;
3078           else
3079             adj_rd = rd_opt->filter_cache[i] - ref;
3080
3081           adj_rd += this_rd;
3082           best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd);
3083         }
3084       }
3085     }
3086
3087     /* keep record of best txfm size */
3088     if (bsize < BLOCK_32X32) {
3089       if (bsize < BLOCK_16X16)
3090         tx_cache[ALLOW_16X16] = tx_cache[ALLOW_8X8];
3091
3092       tx_cache[ALLOW_32X32] = tx_cache[ALLOW_16X16];
3093     }
3094     if (!mode_excluded && this_rd != INT64_MAX) {
3095       for (i = 0; i < TX_MODES && tx_cache[i] < INT64_MAX; i++) {
3096         int64_t adj_rd = INT64_MAX;
3097         adj_rd = this_rd + tx_cache[i] - tx_cache[cm->tx_mode];
3098
3099         if (adj_rd < best_tx_rd[i])
3100           best_tx_rd[i] = adj_rd;
3101       }
3102     }
3103
3104     if (early_term)
3105       break;
3106
3107     if (x->skip && !comp_pred)
3108       break;
3109   }
3110
3111   // The inter modes' rate costs are not calculated precisely in some cases.
3112   // Therefore, sometimes, NEWMV is chosen instead of NEARESTMV, NEARMV, and
3113   // ZEROMV. Here, checks are added for those cases, and the mode decisions
3114   // are corrected.
3115   if (best_mbmode.mode == NEWMV) {
3116     const MV_REFERENCE_FRAME refs[2] = {best_mbmode.ref_frame[0],
3117         best_mbmode.ref_frame[1]};
3118     int comp_pred_mode = refs[1] > INTRA_FRAME;
3119
3120     if (frame_mv[NEARESTMV][refs[0]].as_int == best_mbmode.mv[0].as_int &&
3121         ((comp_pred_mode && frame_mv[NEARESTMV][refs[1]].as_int ==
3122             best_mbmode.mv[1].as_int) || !comp_pred_mode))
3123       best_mbmode.mode = NEARESTMV;
3124     else if (frame_mv[NEARMV][refs[0]].as_int == best_mbmode.mv[0].as_int &&
3125         ((comp_pred_mode && frame_mv[NEARMV][refs[1]].as_int ==
3126             best_mbmode.mv[1].as_int) || !comp_pred_mode))
3127       best_mbmode.mode = NEARMV;
3128     else if (best_mbmode.mv[0].as_int == 0 &&
3129         ((comp_pred_mode && best_mbmode.mv[1].as_int == 0) || !comp_pred_mode))
3130       best_mbmode.mode = ZEROMV;
3131   }
3132
3133   if (best_mode_index < 0 || best_rd >= best_rd_so_far)
3134     return INT64_MAX;
3135
3136   // If we used an estimate for the uv intra rd in the loop above...
3137   if (cpi->sf.use_uv_intra_rd_estimate) {
3138     // Do Intra UV best rd mode selection if best mode choice above was intra.
3139     if (best_mbmode.ref_frame[0] == INTRA_FRAME) {
3140       TX_SIZE uv_tx_size;
3141       *mbmi = best_mbmode;
3142       uv_tx_size = get_uv_tx_size(mbmi, &xd->plane[1]);
3143       rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size],
3144                               &rate_uv_tokenonly[uv_tx_size],
3145                               &dist_uv[uv_tx_size],
3146                               &skip_uv[uv_tx_size],
3147                               bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize,
3148                               uv_tx_size);
3149     }
3150   }
3151
3152   assert((cm->interp_filter == SWITCHABLE) ||
3153          (cm->interp_filter == best_mbmode.interp_filter) ||
3154          !is_inter_block(&best_mbmode));
3155
3156   if (!cpi->rc.is_src_frame_alt_ref)
3157     update_rd_thresh_fact(cpi, bsize, best_mode_index);
3158
3159   // macroblock modes
3160   *mbmi = best_mbmode;
3161   x->skip |= best_skip2;
3162
3163   for (i = 0; i < REFERENCE_MODES; ++i) {
3164     if (best_pred_rd[i] == INT64_MAX)
3165       best_pred_diff[i] = INT_MIN;
3166     else
3167       best_pred_diff[i] = best_rd - best_pred_rd[i];
3168   }
3169
3170   if (!x->skip) {
3171     for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
3172       if (best_filter_rd[i] == INT64_MAX)
3173         best_filter_diff[i] = 0;
3174       else
3175         best_filter_diff[i] = best_rd - best_filter_rd[i];
3176     }
3177     if (cm->interp_filter == SWITCHABLE)
3178       assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
3179     for (i = 0; i < TX_MODES; i++) {
3180       if (best_tx_rd[i] == INT64_MAX)
3181         best_tx_diff[i] = 0;
3182       else
3183         best_tx_diff[i] = best_rd - best_tx_rd[i];
3184     }
3185   } else {
3186     vp9_zero(best_filter_diff);
3187     vp9_zero(best_tx_diff);
3188   }
3189
3190   // TODO(yunqingwang): Moving this line in front of the above best_filter_diff
3191   // updating code causes PSNR loss. Need to figure out the confliction.
3192   x->skip |= best_mode_skippable;
3193
3194   store_coding_context(x, ctx, best_mode_index, best_pred_diff,
3195                        best_tx_diff, best_filter_diff, best_mode_skippable);
3196
3197   return best_rd;
3198 }
3199
3200 int64_t vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi, MACROBLOCK *x,
3201                                            int *returnrate,
3202                                            int64_t *returndistortion,
3203                                            BLOCK_SIZE bsize,
3204                                            PICK_MODE_CONTEXT *ctx,
3205                                            int64_t best_rd_so_far) {
3206   VP9_COMMON *const cm = &cpi->common;
3207   RD_OPT *const rd_opt = &cpi->rd;
3208   MACROBLOCKD *const xd = &x->e_mbd;
3209   MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
3210   unsigned char segment_id = mbmi->segment_id;
3211   const int comp_pred = 0;
3212   int i;
3213   int64_t best_tx_diff[TX_MODES];
3214   int64_t best_pred_diff[REFERENCE_MODES];
3215   int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
3216   unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
3217   vp9_prob comp_mode_p;
3218   INTERP_FILTER best_filter = SWITCHABLE;
3219   int64_t this_rd = INT64_MAX;
3220   int rate2 = 0;
3221   const int64_t distortion2 = 0;
3222
3223   x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
3224
3225   estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
3226                            &comp_mode_p);
3227
3228   for (i = 0; i < MAX_REF_FRAMES; ++i)
3229     x->pred_sse[i] = INT_MAX;
3230   for (i = LAST_FRAME; i < MAX_REF_FRAMES; ++i)
3231     x->pred_mv_sad[i] = INT_MAX;
3232
3233   *returnrate = INT_MAX;
3234
3235   assert(vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
3236
3237   mbmi->mode = ZEROMV;
3238   mbmi->uv_mode = DC_PRED;
3239   mbmi->ref_frame[0] = LAST_FRAME;
3240   mbmi->ref_frame[1] = NONE;
3241   mbmi->mv[0].as_int = 0;
3242   x->skip = 1;
3243
3244   // Search for best switchable filter by checking the variance of
3245   // pred error irrespective of whether the filter will be used
3246   rd_opt->mask_filter = 0;
3247   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
3248     rd_opt->filter_cache[i] = INT64_MAX;
3249
3250   if (cm->interp_filter != BILINEAR) {
3251     best_filter = EIGHTTAP;
3252     if (cm->interp_filter == SWITCHABLE &&
3253         x->source_variance >= cpi->sf.disable_filter_search_var_thresh) {
3254       int rs;
3255       int best_rs = INT_MAX;
3256       for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
3257         mbmi->interp_filter = i;
3258         rs = vp9_get_switchable_rate(cpi);
3259         if (rs < best_rs) {
3260           best_rs = rs;
3261           best_filter = mbmi->interp_filter;
3262         }
3263       }
3264     }
3265   }
3266   // Set the appropriate filter
3267   if (cm->interp_filter == SWITCHABLE) {
3268     mbmi->interp_filter = best_filter;
3269     rate2 += vp9_get_switchable_rate(cpi);
3270   } else {
3271     mbmi->interp_filter = cm->interp_filter;
3272   }
3273
3274   if (cm->reference_mode == REFERENCE_MODE_SELECT)
3275     rate2 += vp9_cost_bit(comp_mode_p, comp_pred);
3276
3277   // Estimate the reference frame signaling cost and add it
3278   // to the rolling cost variable.
3279   rate2 += ref_costs_single[LAST_FRAME];
3280   this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
3281
3282   *returnrate = rate2;
3283   *returndistortion = distortion2;
3284
3285   if (this_rd >= best_rd_so_far)
3286     return INT64_MAX;
3287
3288   assert((cm->interp_filter == SWITCHABLE) ||
3289          (cm->interp_filter == mbmi->interp_filter));
3290
3291   update_rd_thresh_fact(cpi, bsize, THR_ZEROMV);
3292
3293   vp9_zero(best_pred_diff);
3294   vp9_zero(best_filter_diff);
3295   vp9_zero(best_tx_diff);
3296
3297   if (!x->select_tx_size)
3298     swap_block_ptr(x, ctx, 1, 0, 0, MAX_MB_PLANE);
3299   store_coding_context(x, ctx, THR_ZEROMV,
3300                        best_pred_diff, best_tx_diff, best_filter_diff, 0);
3301
3302   return this_rd;
3303 }
3304
3305 int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
3306                                       const TileInfo *const tile,
3307                                       int mi_row, int mi_col,
3308                                       int *returnrate,
3309                                       int64_t *returndistortion,
3310                                       BLOCK_SIZE bsize,
3311                                       PICK_MODE_CONTEXT *ctx,
3312                                       int64_t best_rd_so_far) {
3313   VP9_COMMON *const cm = &cpi->common;
3314   RD_OPT *const rd_opt = &cpi->rd;
3315   MACROBLOCKD *const xd = &x->e_mbd;
3316   MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
3317   const struct segmentation *const seg = &cm->seg;
3318   MV_REFERENCE_FRAME ref_frame, second_ref_frame;
3319   unsigned char segment_id = mbmi->segment_id;
3320   int comp_pred, i;
3321   int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
3322   struct buf_2d yv12_mb[4][MAX_MB_PLANE];
3323   static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
3324                                     VP9_ALT_FLAG };
3325   int64_t best_rd = best_rd_so_far;
3326   int64_t best_yrd = best_rd_so_far;  // FIXME(rbultje) more precise
3327   static const int64_t best_tx_diff[TX_MODES] = { 0 };
3328   int64_t best_pred_diff[REFERENCE_MODES];
3329   int64_t best_pred_rd[REFERENCE_MODES];
3330   int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
3331   int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
3332   MB_MODE_INFO best_mbmode;
3333   int ref_index, best_ref_index = 0;
3334   unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
3335   vp9_prob comp_mode_p;
3336   INTERP_FILTER tmp_best_filter = SWITCHABLE;
3337   int rate_uv_intra, rate_uv_tokenonly;
3338   int64_t dist_uv;
3339   int skip_uv;
3340   PREDICTION_MODE mode_uv = DC_PRED;
3341   const int intra_cost_penalty =
3342       20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
3343   int_mv seg_mvs[4][MAX_REF_FRAMES];
3344   b_mode_info best_bmodes[4];
3345   int best_skip2 = 0;
3346   int ref_frame_skip_mask[2] = { 0 };
3347
3348   x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
3349   vpx_memset(x->zcoeff_blk[TX_4X4], 0, 4);
3350   vp9_zero(best_mbmode);
3351
3352   for (i = 0; i < 4; i++) {
3353     int j;
3354     for (j = 0; j < MAX_REF_FRAMES; j++)
3355       seg_mvs[i][j].as_int = INVALID_MV;
3356   }
3357
3358   estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
3359                            &comp_mode_p);
3360
3361   for (i = 0; i < REFERENCE_MODES; ++i)
3362     best_pred_rd[i] = INT64_MAX;
3363   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
3364     best_filter_rd[i] = INT64_MAX;
3365   rate_uv_intra = INT_MAX;
3366
3367   *returnrate = INT_MAX;
3368
3369   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
3370     if (cpi->ref_frame_flags & flag_list[ref_frame]) {
3371       setup_buffer_inter(cpi, x, tile,
3372                              ref_frame, bsize, mi_row, mi_col,
3373                              frame_mv[NEARESTMV], frame_mv[NEARMV],
3374                              yv12_mb);
3375     } else {
3376       ref_frame_skip_mask[0] |= (1 << ref_frame);
3377       ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
3378     }
3379     frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
3380     frame_mv[ZEROMV][ref_frame].as_int = 0;
3381   }
3382
3383   for (ref_index = 0; ref_index < MAX_REFS; ++ref_index) {
3384     int mode_excluded = 0;
3385     int64_t this_rd = INT64_MAX;
3386     int disable_skip = 0;
3387     int compmode_cost = 0;
3388     int rate2 = 0, rate_y = 0, rate_uv = 0;
3389     int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
3390     int skippable = 0;
3391     int i;
3392     int this_skip2 = 0;
3393     int64_t total_sse = INT_MAX;
3394     int early_term = 0;
3395
3396     ref_frame = vp9_ref_order[ref_index].ref_frame[0];
3397     second_ref_frame = vp9_ref_order[ref_index].ref_frame[1];
3398
3399     // Look at the reference frame of the best mode so far and set the
3400     // skip mask to look at a subset of the remaining modes.
3401     if (ref_index > 2 && cpi->sf.mode_skip_start < MAX_MODES) {
3402       if (ref_index == 3) {
3403         switch (best_mbmode.ref_frame[0]) {
3404           case INTRA_FRAME:
3405             break;
3406           case LAST_FRAME:
3407             ref_frame_skip_mask[0] |= (1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME);
3408             ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
3409             break;
3410           case GOLDEN_FRAME:
3411             ref_frame_skip_mask[0] |= (1 << LAST_FRAME) | (1 << ALTREF_FRAME);
3412             ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
3413             break;
3414           case ALTREF_FRAME:
3415             ref_frame_skip_mask[0] |= (1 << GOLDEN_FRAME) | (1 << LAST_FRAME);
3416             break;
3417           case NONE:
3418           case MAX_REF_FRAMES:
3419             assert(0 && "Invalid Reference frame");
3420             break;
3421         }
3422       }
3423     }
3424
3425     if (ref_frame_skip_mask[0] & (1 << ref_frame) &&
3426         ref_frame_skip_mask[1] & (1 << MAX(0, second_ref_frame)))
3427       continue;
3428
3429     // Test best rd so far against threshold for trying this mode.
3430     if (rd_less_than_thresh(best_rd,
3431                             rd_opt->threshes[segment_id][bsize][ref_index],
3432                             rd_opt->thresh_freq_fact[bsize][ref_index]))
3433       continue;
3434
3435     comp_pred = second_ref_frame > INTRA_FRAME;
3436     if (comp_pred) {
3437       if (!cm->allow_comp_inter_inter)
3438         continue;
3439       if (!(cpi->ref_frame_flags & flag_list[second_ref_frame]))
3440         continue;
3441       // Do not allow compound prediction if the segment level reference frame
3442       // feature is in use as in this case there can only be one reference.
3443       if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
3444         continue;
3445
3446       if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
3447           best_mbmode.ref_frame[0] == INTRA_FRAME)
3448         continue;
3449     }
3450
3451     // TODO(jingning, jkoleszar): scaling reference frame not supported for
3452     // sub8x8 blocks.
3453     if (ref_frame > INTRA_FRAME &&
3454         vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf))
3455       continue;
3456
3457     if (second_ref_frame > INTRA_FRAME &&
3458         vp9_is_scaled(&cm->frame_refs[second_ref_frame - 1].sf))
3459       continue;
3460
3461     if (comp_pred)
3462       mode_excluded = cm->reference_mode == SINGLE_REFERENCE;
3463     else if (ref_frame != INTRA_FRAME)
3464       mode_excluded = cm->reference_mode == COMPOUND_REFERENCE;
3465
3466     // If the segment reference frame feature is enabled....
3467     // then do nothing if the current ref frame is not allowed..
3468     if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
3469         vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
3470       continue;
3471     // Disable this drop out case if the ref frame
3472     // segment level feature is enabled for this segment. This is to
3473     // prevent the possibility that we end up unable to pick any mode.
3474     } else if (!vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
3475       // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
3476       // unless ARNR filtering is enabled in which case we want
3477       // an unfiltered alternative. We allow near/nearest as well
3478       // because they may result in zero-zero MVs but be cheaper.
3479       if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0))
3480         continue;
3481     }
3482
3483     mbmi->tx_size = TX_4X4;
3484     mbmi->uv_mode = DC_PRED;
3485     mbmi->ref_frame[0] = ref_frame;
3486     mbmi->ref_frame[1] = second_ref_frame;
3487     // Evaluate all sub-pel filters irrespective of whether we can use
3488     // them for this frame.
3489     mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
3490                                                           : cm->interp_filter;
3491     x->skip = 0;
3492     set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
3493
3494     // Select prediction reference frames.
3495     for (i = 0; i < MAX_MB_PLANE; i++) {
3496       xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
3497       if (comp_pred)
3498         xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
3499     }
3500
3501     if (ref_frame == INTRA_FRAME) {
3502       int rate;
3503       if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate, &rate_y,
3504                                        &distortion_y, best_rd) >= best_rd)
3505         continue;
3506       rate2 += rate;
3507       rate2 += intra_cost_penalty;
3508       distortion2 += distortion_y;
3509
3510       if (rate_uv_intra == INT_MAX) {
3511         choose_intra_uv_mode(cpi, ctx, bsize, TX_4X4,
3512                              &rate_uv_intra,
3513                              &rate_uv_tokenonly,
3514                              &dist_uv, &skip_uv,
3515                              &mode_uv);
3516       }
3517       rate2 += rate_uv_intra;
3518       rate_uv = rate_uv_tokenonly;
3519       distortion2 += dist_uv;
3520       distortion_uv = dist_uv;
3521       mbmi->uv_mode = mode_uv;
3522     } else {
3523       int rate;
3524       int64_t distortion;
3525       int64_t this_rd_thresh;
3526       int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX;
3527       int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX;
3528       int64_t tmp_best_distortion = INT_MAX, tmp_best_sse, uv_sse;
3529       int tmp_best_skippable = 0;
3530       int switchable_filter_index;
3531       int_mv *second_ref = comp_pred ?
3532                              &mbmi->ref_mvs[second_ref_frame][0] : NULL;
3533       b_mode_info tmp_best_bmodes[16];
3534       MB_MODE_INFO tmp_best_mbmode;
3535       BEST_SEG_INFO bsi[SWITCHABLE_FILTERS];
3536       int pred_exists = 0;
3537       int uv_skippable;
3538
3539       this_rd_thresh = (ref_frame == LAST_FRAME) ?
3540           rd_opt->threshes[segment_id][bsize][THR_LAST] :
3541           rd_opt->threshes[segment_id][bsize][THR_ALTR];
3542       this_rd_thresh = (ref_frame == GOLDEN_FRAME) ?
3543       rd_opt->threshes[segment_id][bsize][THR_GOLD] : this_rd_thresh;
3544       rd_opt->mask_filter = 0;
3545       for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
3546         rd_opt->filter_cache[i] = INT64_MAX;
3547
3548       if (cm->interp_filter != BILINEAR) {
3549         tmp_best_filter = EIGHTTAP;
3550         if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) {
3551           tmp_best_filter = EIGHTTAP;
3552         } else if (cpi->sf.adaptive_pred_interp_filter == 1 &&
3553                    ctx->pred_interp_filter < SWITCHABLE) {
3554           tmp_best_filter = ctx->pred_interp_filter;
3555         } else if (cpi->sf.adaptive_pred_interp_filter == 2) {
3556           tmp_best_filter = ctx->pred_interp_filter < SWITCHABLE ?
3557                               ctx->pred_interp_filter : 0;
3558         } else {
3559           for (switchable_filter_index = 0;
3560                switchable_filter_index < SWITCHABLE_FILTERS;
3561                ++switchable_filter_index) {
3562             int newbest, rs;
3563             int64_t rs_rd;
3564             mbmi->interp_filter = switchable_filter_index;
3565             tmp_rd = rd_pick_best_sub8x8_mode(cpi, x, tile,
3566                                               &mbmi->ref_mvs[ref_frame][0],
3567                                               second_ref, best_yrd, &rate,
3568                                               &rate_y, &distortion,
3569                                               &skippable, &total_sse,
3570                                               (int) this_rd_thresh, seg_mvs,
3571                                               bsi, switchable_filter_index,
3572                                               mi_row, mi_col);
3573
3574             if (tmp_rd == INT64_MAX)
3575               continue;
3576             rs = vp9_get_switchable_rate(cpi);
3577             rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
3578             rd_opt->filter_cache[switchable_filter_index] = tmp_rd;
3579             rd_opt->filter_cache[SWITCHABLE_FILTERS] =
3580                 MIN(rd_opt->filter_cache[SWITCHABLE_FILTERS],
3581                     tmp_rd + rs_rd);
3582             if (cm->interp_filter == SWITCHABLE)
3583               tmp_rd += rs_rd;
3584
3585             rd_opt->mask_filter = MAX(rd_opt->mask_filter, tmp_rd);
3586
3587             newbest = (tmp_rd < tmp_best_rd);
3588             if (newbest) {
3589               tmp_best_filter = mbmi->interp_filter;
3590               tmp_best_rd = tmp_rd;
3591             }
3592             if ((newbest && cm->interp_filter == SWITCHABLE) ||
3593                 (mbmi->interp_filter == cm->interp_filter &&
3594                  cm->interp_filter != SWITCHABLE)) {
3595               tmp_best_rdu = tmp_rd;
3596               tmp_best_rate = rate;
3597               tmp_best_ratey = rate_y;
3598               tmp_best_distortion = distortion;
3599               tmp_best_sse = total_sse;
3600               tmp_best_skippable = skippable;
3601               tmp_best_mbmode = *mbmi;
3602               for (i = 0; i < 4; i++) {
3603                 tmp_best_bmodes[i] = xd->mi[0].src_mi->bmi[i];
3604                 x->zcoeff_blk[TX_4X4][i] = !x->plane[0].eobs[i];
3605               }
3606               pred_exists = 1;
3607               if (switchable_filter_index == 0 &&
3608                   cpi->sf.use_rd_breakout &&
3609                   best_rd < INT64_MAX) {
3610                 if (tmp_best_rdu / 2 > best_rd) {
3611                   // skip searching the other filters if the first is
3612                   // already substantially larger than the best so far
3613                   tmp_best_filter = mbmi->interp_filter;
3614                   tmp_best_rdu = INT64_MAX;
3615                   break;
3616                 }
3617               }
3618             }
3619           }  // switchable_filter_index loop
3620         }
3621       }
3622
3623       if (tmp_best_rdu == INT64_MAX && pred_exists)
3624         continue;
3625
3626       mbmi->interp_filter = (cm->interp_filter == SWITCHABLE ?
3627                              tmp_best_filter : cm->interp_filter);
3628       if (!pred_exists) {
3629         // Handles the special case when a filter that is not in the
3630         // switchable list (bilinear, 6-tap) is indicated at the frame level
3631         tmp_rd = rd_pick_best_sub8x8_mode(cpi, x, tile,
3632                                           &mbmi->ref_mvs[ref_frame][0],
3633                                           second_ref, best_yrd, &rate, &rate_y,
3634                                           &distortion, &skippable, &total_sse,
3635                                           (int) this_rd_thresh, seg_mvs, bsi, 0,
3636                                           mi_row, mi_col);
3637         if (tmp_rd == INT64_MAX)
3638           continue;
3639       } else {
3640         total_sse = tmp_best_sse;
3641         rate = tmp_best_rate;
3642         rate_y = tmp_best_ratey;
3643         distortion = tmp_best_distortion;
3644         skippable = tmp_best_skippable;
3645         *mbmi = tmp_best_mbmode;
3646         for (i = 0; i < 4; i++)
3647           xd->mi[0].src_mi->bmi[i] = tmp_best_bmodes[i];
3648       }
3649
3650       rate2 += rate;
3651       distortion2 += distortion;
3652
3653       if (cm->interp_filter == SWITCHABLE)
3654         rate2 += vp9_get_switchable_rate(cpi);
3655
3656       if (!mode_excluded)
3657         mode_excluded = comp_pred ? cm->reference_mode == SINGLE_REFERENCE
3658                                   : cm->reference_mode == COMPOUND_REFERENCE;
3659
3660       compmode_cost = vp9_cost_bit(comp_mode_p, comp_pred);
3661
3662       tmp_best_rdu = best_rd -
3663           MIN(RDCOST(x->rdmult, x->rddiv, rate2, distortion2),
3664               RDCOST(x->rdmult, x->rddiv, 0, total_sse));
3665
3666       if (tmp_best_rdu > 0) {
3667         // If even the 'Y' rd value of split is higher than best so far
3668         // then dont bother looking at UV
3669         vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col,
3670                                         BLOCK_8X8);
3671         super_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,
3672                          &uv_sse, BLOCK_8X8, tmp_best_rdu);
3673         if (rate_uv == INT_MAX)
3674           continue;
3675         rate2 += rate_uv;
3676         distortion2 += distortion_uv;
3677         skippable = skippable && uv_skippable;
3678         total_sse += uv_sse;
3679       }
3680     }
3681
3682     if (cm->reference_mode == REFERENCE_MODE_SELECT)
3683       rate2 += compmode_cost;
3684
3685     // Estimate the reference frame signaling cost and add it
3686     // to the rolling cost variable.
3687     if (second_ref_frame > INTRA_FRAME) {
3688       rate2 += ref_costs_comp[ref_frame];
3689     } else {
3690       rate2 += ref_costs_single[ref_frame];
3691     }
3692
3693     if (!disable_skip) {
3694       // Skip is never coded at the segment level for sub8x8 blocks and instead
3695       // always coded in the bitstream at the mode info level.
3696
3697       if (ref_frame != INTRA_FRAME && !xd->lossless) {
3698         if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
3699             RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
3700           // Add in the cost of the no skip flag.
3701           rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
3702         } else {
3703           // FIXME(rbultje) make this work for splitmv also
3704           rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
3705           distortion2 = total_sse;
3706           assert(total_sse >= 0);
3707           rate2 -= (rate_y + rate_uv);
3708           rate_y = 0;
3709           rate_uv = 0;
3710           this_skip2 = 1;
3711         }
3712       } else {
3713         // Add in the cost of the no skip flag.
3714         rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
3715       }
3716
3717       // Calculate the final RD estimate for this mode.
3718       this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
3719     }
3720
3721     if (!disable_skip && ref_frame == INTRA_FRAME) {
3722       for (i = 0; i < REFERENCE_MODES; ++i)
3723         best_pred_rd[i] = MIN(best_pred_rd[i], this_rd);
3724       for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
3725         best_filter_rd[i] = MIN(best_filter_rd[i], this_rd);
3726     }
3727
3728     // Did this mode help.. i.e. is it the new best mode
3729     if (this_rd < best_rd || x->skip) {
3730       if (!mode_excluded) {
3731         int max_plane = MAX_MB_PLANE;
3732         // Note index of best mode so far
3733         best_ref_index = ref_index;
3734
3735         if (ref_frame == INTRA_FRAME) {
3736           /* required for left and above block mv */
3737           mbmi->mv[0].as_int = 0;
3738           max_plane = 1;
3739         }
3740
3741         *returnrate = rate2;
3742         *returndistortion = distortion2;
3743         best_rd = this_rd;
3744         best_yrd = best_rd -
3745                    RDCOST(x->rdmult, x->rddiv, rate_uv, distortion_uv);
3746         best_mbmode = *mbmi;
3747         best_skip2 = this_skip2;
3748         if (!x->select_tx_size)
3749           swap_block_ptr(x, ctx, 1, 0, 0, max_plane);
3750         vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[TX_4X4],
3751                    sizeof(uint8_t) * ctx->num_4x4_blk);
3752
3753         for (i = 0; i < 4; i++)
3754           best_bmodes[i] = xd->mi[0].src_mi->bmi[i];
3755
3756         // TODO(debargha): enhance this test with a better distortion prediction
3757         // based on qp, activity mask and history
3758         if ((cpi->sf.mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
3759             (ref_index > MIN_EARLY_TERM_INDEX)) {
3760           const int qstep = xd->plane[0].dequant[1];
3761           // TODO(debargha): Enhance this by specializing for each mode_index
3762           int scale = 4;
3763           if (x->source_variance < UINT_MAX) {
3764             const int var_adjust = (x->source_variance < 16);
3765             scale -= var_adjust;
3766           }
3767           if (ref_frame > INTRA_FRAME &&
3768               distortion2 * scale < qstep * qstep) {
3769             early_term = 1;
3770           }
3771         }
3772       }
3773     }
3774
3775     /* keep record of best compound/single-only prediction */
3776     if (!disable_skip && ref_frame != INTRA_FRAME) {
3777       int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
3778
3779       if (cm->reference_mode == REFERENCE_MODE_SELECT) {
3780         single_rate = rate2 - compmode_cost;
3781         hybrid_rate = rate2;
3782       } else {
3783         single_rate = rate2;
3784         hybrid_rate = rate2 + compmode_cost;
3785       }
3786
3787       single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
3788       hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
3789
3790       if (!comp_pred && single_rd < best_pred_rd[SINGLE_REFERENCE])
3791         best_pred_rd[SINGLE_REFERENCE] = single_rd;
3792       else if (comp_pred && single_rd < best_pred_rd[COMPOUND_REFERENCE])
3793         best_pred_rd[COMPOUND_REFERENCE] = single_rd;
3794
3795       if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT])
3796         best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
3797     }
3798
3799     /* keep record of best filter type */
3800     if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME &&
3801         cm->interp_filter != BILINEAR) {
3802       int64_t ref = rd_opt->filter_cache[cm->interp_filter == SWITCHABLE ?
3803                               SWITCHABLE_FILTERS : cm->interp_filter];
3804       int64_t adj_rd;
3805       for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
3806         if (ref == INT64_MAX)
3807           adj_rd = 0;
3808         else if (rd_opt->filter_cache[i] == INT64_MAX)
3809           // when early termination is triggered, the encoder does not have
3810           // access to the rate-distortion cost. it only knows that the cost
3811           // should be above the maximum valid value. hence it takes the known
3812           // maximum plus an arbitrary constant as the rate-distortion cost.
3813           adj_rd = rd_opt->mask_filter - ref + 10;
3814         else
3815           adj_rd = rd_opt->filter_cache[i] - ref;
3816
3817         adj_rd += this_rd;
3818         best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd);
3819       }
3820     }
3821
3822     if (early_term)
3823       break;
3824
3825     if (x->skip && !comp_pred)
3826       break;
3827   }
3828
3829   if (best_rd >= best_rd_so_far)
3830     return INT64_MAX;
3831
3832   // If we used an estimate for the uv intra rd in the loop above...
3833   if (cpi->sf.use_uv_intra_rd_estimate) {
3834     // Do Intra UV best rd mode selection if best mode choice above was intra.
3835     if (best_mbmode.ref_frame[0] == INTRA_FRAME) {
3836       *mbmi = best_mbmode;
3837       rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra,
3838                               &rate_uv_tokenonly,
3839                               &dist_uv,
3840                               &skip_uv,
3841                               BLOCK_8X8, TX_4X4);
3842     }
3843   }
3844
3845   if (best_rd == INT64_MAX) {
3846     *returnrate = INT_MAX;
3847     *returndistortion = INT64_MAX;
3848     return best_rd;
3849   }
3850
3851   assert((cm->interp_filter == SWITCHABLE) ||
3852          (cm->interp_filter == best_mbmode.interp_filter) ||
3853          !is_inter_block(&best_mbmode));
3854
3855   update_rd_thresh_fact(cpi, bsize, best_ref_index);
3856
3857   // macroblock modes
3858   *mbmi = best_mbmode;
3859   x->skip |= best_skip2;
3860   if (!is_inter_block(&best_mbmode)) {
3861     for (i = 0; i < 4; i++)
3862       xd->mi[0].src_mi->bmi[i].as_mode = best_bmodes[i].as_mode;
3863   } else {
3864     for (i = 0; i < 4; ++i)
3865       vpx_memcpy(&xd->mi[0].src_mi->bmi[i], &best_bmodes[i],
3866                  sizeof(b_mode_info));
3867
3868     mbmi->mv[0].as_int = xd->mi[0].src_mi->bmi[3].as_mv[0].as_int;
3869     mbmi->mv[1].as_int = xd->mi[0].src_mi->bmi[3].as_mv[1].as_int;
3870   }
3871
3872   for (i = 0; i < REFERENCE_MODES; ++i) {
3873     if (best_pred_rd[i] == INT64_MAX)
3874       best_pred_diff[i] = INT_MIN;
3875     else
3876       best_pred_diff[i] = best_rd - best_pred_rd[i];
3877   }
3878
3879   if (!x->skip) {
3880     for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
3881       if (best_filter_rd[i] == INT64_MAX)
3882         best_filter_diff[i] = 0;
3883       else
3884         best_filter_diff[i] = best_rd - best_filter_rd[i];
3885     }
3886     if (cm->interp_filter == SWITCHABLE)
3887       assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
3888   } else {
3889     vp9_zero(best_filter_diff);
3890   }
3891
3892   store_coding_context(x, ctx, best_ref_index,
3893                        best_pred_diff, best_tx_diff, best_filter_diff, 0);
3894
3895   return best_rd;
3896 }
3897