vp9/encoder/vp9_rdopt.c

   1 /*
   2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #include <assert.h>
  12 #include <math.h>
  13
  14 #include "./vp9_rtcd.h"
  15
  16 #include "vpx_mem/vpx_mem.h"
  17
  18 #include "vp9/common/vp9_common.h"
  19 #include "vp9/common/vp9_entropy.h"
  20 #include "vp9/common/vp9_entropymode.h"
  21 #include "vp9/common/vp9_idct.h"
  22 #include "vp9/common/vp9_mvref_common.h"
  23 #include "vp9/common/vp9_pred_common.h"
  24 #include "vp9/common/vp9_quant_common.h"
  25 #include "vp9/common/vp9_reconinter.h"
  26 #include "vp9/common/vp9_reconintra.h"
  27 #include "vp9/common/vp9_seg_common.h"
  28 #include "vp9/common/vp9_systemdependent.h"
  29
  30 #include "vp9/encoder/vp9_cost.h"
  31 #include "vp9/encoder/vp9_encodemb.h"
  32 #include "vp9/encoder/vp9_encodemv.h"
  33 #include "vp9/encoder/vp9_encoder.h"
  34 #include "vp9/encoder/vp9_mcomp.h"
  35 #include "vp9/encoder/vp9_quantize.h"
  36 #include "vp9/encoder/vp9_ratectrl.h"
  37 #include "vp9/encoder/vp9_rd.h"
  38 #include "vp9/encoder/vp9_rdopt.h"
  39 #include "vp9/encoder/vp9_variance.h"
  40
  41 #define RD_THRESH_MAX_FACT 64
  42 #define RD_THRESH_INC      1
  43
  44 #define LAST_FRAME_MODE_MASK    ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | \
  45                                  (1 << INTRA_FRAME))
  46 #define GOLDEN_FRAME_MODE_MASK  ((1 << LAST_FRAME) | (1 << ALTREF_FRAME) | \
  47                                  (1 << INTRA_FRAME))
  48 #define ALT_REF_MODE_MASK       ((1 << LAST_FRAME) | (1 << GOLDEN_FRAME) | \
  49                                  (1 << INTRA_FRAME))
  50
  51 #define SECOND_REF_FRAME_MASK   ((1 << ALTREF_FRAME) | 0x01)
  52
  53 #define MIN_EARLY_TERM_INDEX    3
  54
  55 typedef struct {
  56   PREDICTION_MODE mode;
  57   MV_REFERENCE_FRAME ref_frame[2];
  58 } MODE_DEFINITION;
  59
  60 typedef struct {
  61   MV_REFERENCE_FRAME ref_frame[2];
  62 } REF_DEFINITION;
  63
  64 struct rdcost_block_args {
  65   MACROBLOCK *x;
  66   ENTROPY_CONTEXT t_above[16];
  67   ENTROPY_CONTEXT t_left[16];
  68   int rate;
  69   int64_t dist;
  70   int64_t sse;
  71   int this_rate;
  72   int64_t this_dist;
  73   int64_t this_sse;
  74   int64_t this_rd;
  75   int64_t best_rd;
  76   int skip;
  77   int use_fast_coef_costing;
  78   const scan_order *so;
  79 };
  80
  81 static const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
  82   {NEARESTMV, {LAST_FRAME,   NONE}},
  83   {NEARESTMV, {ALTREF_FRAME, NONE}},
  84   {NEARESTMV, {GOLDEN_FRAME, NONE}},
  85
  86   {DC_PRED,   {INTRA_FRAME,  NONE}},
  87
  88   {NEWMV,     {LAST_FRAME,   NONE}},
  89   {NEWMV,     {ALTREF_FRAME, NONE}},
  90   {NEWMV,     {GOLDEN_FRAME, NONE}},
  91
  92   {NEARMV,    {LAST_FRAME,   NONE}},
  93   {NEARMV,    {ALTREF_FRAME, NONE}},
  94   {NEARESTMV, {LAST_FRAME,   ALTREF_FRAME}},
  95   {NEARESTMV, {GOLDEN_FRAME, ALTREF_FRAME}},
  96
  97   {TM_PRED,   {INTRA_FRAME,  NONE}},
  98
  99   {NEARMV,    {LAST_FRAME,   ALTREF_FRAME}},
 100   {NEWMV,     {LAST_FRAME,   ALTREF_FRAME}},
 101   {NEARMV,    {GOLDEN_FRAME, NONE}},
 102   {NEARMV,    {GOLDEN_FRAME, ALTREF_FRAME}},
 103   {NEWMV,     {GOLDEN_FRAME, ALTREF_FRAME}},
 104
 105   {ZEROMV,    {LAST_FRAME,   NONE}},
 106   {ZEROMV,    {GOLDEN_FRAME, NONE}},
 107   {ZEROMV,    {ALTREF_FRAME, NONE}},
 108   {ZEROMV,    {LAST_FRAME,   ALTREF_FRAME}},
 109   {ZEROMV,    {GOLDEN_FRAME, ALTREF_FRAME}},
 110
 111   {H_PRED,    {INTRA_FRAME,  NONE}},
 112   {V_PRED,    {INTRA_FRAME,  NONE}},
 113   {D135_PRED, {INTRA_FRAME,  NONE}},
 114   {D207_PRED, {INTRA_FRAME,  NONE}},
 115   {D153_PRED, {INTRA_FRAME,  NONE}},
 116   {D63_PRED,  {INTRA_FRAME,  NONE}},
 117   {D117_PRED, {INTRA_FRAME,  NONE}},
 118   {D45_PRED,  {INTRA_FRAME,  NONE}},
 119 };
 120
 121 static const REF_DEFINITION vp9_ref_order[MAX_REFS] = {
 122   {{LAST_FRAME,   NONE}},
 123   {{GOLDEN_FRAME, NONE}},
 124   {{ALTREF_FRAME, NONE}},
 125   {{LAST_FRAME,   ALTREF_FRAME}},
 126   {{GOLDEN_FRAME, ALTREF_FRAME}},
 127   {{INTRA_FRAME,  NONE}},
 128 };
 129
 130 static int raster_block_offset(BLOCK_SIZE plane_bsize,
 131                                int raster_block, int stride) {
 132   const int bw = b_width_log2(plane_bsize);
 133   const int y = 4 * (raster_block >> bw);
 134   const int x = 4 * (raster_block & ((1 << bw) - 1));
 135   return y * stride + x;
 136 }
 137 static int16_t* raster_block_offset_int16(BLOCK_SIZE plane_bsize,
 138                                           int raster_block, int16_t *base) {
 139   const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
 140   return base + raster_block_offset(plane_bsize, raster_block, stride);
 141 }
 142
 143 static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
 144                            int m, int n, int min_plane, int max_plane) {
 145   int i;
 146
 147   for (i = min_plane; i < max_plane; ++i) {
 148     struct macroblock_plane *const p = &x->plane[i];
 149     struct macroblockd_plane *const pd = &x->e_mbd.plane[i];
 150
 151     p->coeff    = ctx->coeff_pbuf[i][m];
 152     p->qcoeff   = ctx->qcoeff_pbuf[i][m];
 153     pd->dqcoeff = ctx->dqcoeff_pbuf[i][m];
 154     p->eobs     = ctx->eobs_pbuf[i][m];
 155
 156     ctx->coeff_pbuf[i][m]   = ctx->coeff_pbuf[i][n];
 157     ctx->qcoeff_pbuf[i][m]  = ctx->qcoeff_pbuf[i][n];
 158     ctx->dqcoeff_pbuf[i][m] = ctx->dqcoeff_pbuf[i][n];
 159     ctx->eobs_pbuf[i][m]    = ctx->eobs_pbuf[i][n];
 160
 161     ctx->coeff_pbuf[i][n]   = p->coeff;
 162     ctx->qcoeff_pbuf[i][n]  = p->qcoeff;
 163     ctx->dqcoeff_pbuf[i][n] = pd->dqcoeff;
 164     ctx->eobs_pbuf[i][n]    = p->eobs;
 165   }
 166 }
 167
 168 static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
 169                             MACROBLOCK *x, MACROBLOCKD *xd,
 170                             int *out_rate_sum, int64_t *out_dist_sum) {
 171   // Note our transform coeffs are 8 times an orthogonal transform.
 172   // Hence quantizer step is also 8 times. To get effective quantizer
 173   // we need to divide by 8 before sending to modeling function.
 174   int i;
 175   int64_t rate_sum = 0;
 176   int64_t dist_sum = 0;
 177   const int ref = xd->mi[0]->mbmi.ref_frame[0];
 178   unsigned int sse;
 179   unsigned int var = 0;
 180   unsigned int sum_sse = 0;
 181   const int shift = 8;
 182   int rate;
 183   int64_t dist;
 184
 185   x->pred_sse[ref] = 0;
 186
 187   for (i = 0; i < MAX_MB_PLANE; ++i) {
 188     struct macroblock_plane *const p = &x->plane[i];
 189     struct macroblockd_plane *const pd = &xd->plane[i];
 190     const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
 191     const TX_SIZE max_tx_size = max_txsize_lookup[bs];
 192     const BLOCK_SIZE unit_size = txsize_to_bsize[max_tx_size];
 193     int bw = 1 << (b_width_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
 194     int bh = 1 << (b_height_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
 195     int idx, idy;
 196     int lw = b_width_log2_lookup[unit_size] + 2;
 197     int lh = b_height_log2_lookup[unit_size] + 2;
 198
 199     sum_sse = 0;
 200
 201     for (idy = 0; idy < bh; ++idy) {
 202       for (idx = 0; idx < bw; ++idx) {
 203         uint8_t *src = p->src.buf + (idy * p->src.stride << lh) + (idx << lw);
 204         uint8_t *dst = pd->dst.buf + (idy * pd->dst.stride << lh) + (idx << lh);
 205         int block_idx = (idy << 1) + idx;
 206
 207         var = cpi->fn_ptr[unit_size].vf(src, p->src.stride,
 208                                         dst, pd->dst.stride, &sse);
 209         x->bsse[(i << 2) + block_idx] = sse;
 210         sum_sse += sse;
 211
 212         if (!x->select_tx_size) {
 213           if (x->bsse[(i << 2) + block_idx] < p->quant_thred[0] >> shift)
 214             x->skip_txfm[(i << 2) + block_idx] = 1;
 215           else if (var < p->quant_thred[1] >> shift)
 216             x->skip_txfm[(i << 2) + block_idx] = 2;
 217           else
 218             x->skip_txfm[(i << 2) + block_idx] = 0;
 219         }
 220
 221         if (i == 0)
 222           x->pred_sse[ref] += sse;
 223       }
 224     }
 225
 226     // Fast approximate the modelling function.
 227     if (cpi->oxcf.speed > 4) {
 228       int64_t rate;
 229       int64_t dist;
 230       int64_t square_error = sse;
 231       int quantizer = (pd->dequant[1] >> 3);
 232
 233       if (quantizer < 120)
 234         rate = (square_error * (280 - quantizer)) >> 8;
 235       else
 236         rate = 0;
 237       dist = (square_error * quantizer) >> 8;
 238       rate_sum += rate;
 239       dist_sum += dist;
 240     } else {
 241       vp9_model_rd_from_var_lapndz(sum_sse, 1 << num_pels_log2_lookup[bs],
 242                                    pd->dequant[1] >> 3, &rate, &dist);
 243       rate_sum += rate;
 244       dist_sum += dist;
 245     }
 246   }
 247
 248   *out_rate_sum = (int)rate_sum;
 249   *out_dist_sum = dist_sum << 4;
 250 }
 251
 252 int64_t vp9_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
 253                           intptr_t block_size, int64_t *ssz) {
 254   int i;
 255   int64_t error = 0, sqcoeff = 0;
 256
 257   for (i = 0; i < block_size; i++) {
 258     const int diff = coeff[i] - dqcoeff[i];
 259     error +=  diff * diff;
 260     sqcoeff += coeff[i] * coeff[i];
 261   }
 262
 263   *ssz = sqcoeff;
 264   return error;
 265 }
 266
 267 /* The trailing '0' is a terminator which is used inside cost_coeffs() to
 268  * decide whether to include cost of a trailing EOB node or not (i.e. we
 269  * can skip this if the last coefficient in this transform block, e.g. the
 270  * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
 271  * were non-zero). */
 272 static const int16_t band_counts[TX_SIZES][8] = {
 273   { 1, 2, 3, 4,  3,   16 - 13, 0 },
 274   { 1, 2, 3, 4, 11,   64 - 21, 0 },
 275   { 1, 2, 3, 4, 11,  256 - 21, 0 },
 276   { 1, 2, 3, 4, 11, 1024 - 21, 0 },
 277 };
 278 static INLINE int cost_coeffs(MACROBLOCK *x,
 279                               int plane, int block,
 280                               ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
 281                               TX_SIZE tx_size,
 282                               const int16_t *scan, const int16_t *nb,
 283                               int use_fast_coef_costing) {
 284   MACROBLOCKD *const xd = &x->e_mbd;
 285   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
 286   const struct macroblock_plane *p = &x->plane[plane];
 287   const struct macroblockd_plane *pd = &xd->plane[plane];
 288   const PLANE_TYPE type = pd->plane_type;
 289   const int16_t *band_count = &band_counts[tx_size][1];
 290   const int eob = p->eobs[block];
 291   const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
 292   unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
 293                    x->token_costs[tx_size][type][is_inter_block(mbmi)];
 294   uint8_t token_cache[32 * 32];
 295   int pt = combine_entropy_contexts(*A, *L);
 296   int c, cost;
 297   // Check for consistency of tx_size with mode info
 298   assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size
 299                               : get_uv_tx_size(mbmi, pd) == tx_size);
 300
 301   if (eob == 0) {
 302     // single eob token
 303     cost = token_costs[0][0][pt][EOB_TOKEN];
 304     c = 0;
 305   } else {
 306     int band_left = *band_count++;
 307
 308     // dc token
 309     int v = qcoeff[0];
 310     int prev_t = vp9_dct_value_tokens_ptr[v].token;
 311     cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
 312     token_cache[0] = vp9_pt_energy_class[prev_t];
 313     ++token_costs;
 314
 315     // ac tokens
 316     for (c = 1; c < eob; c++) {
 317       const int rc = scan[c];
 318       int t;
 319
 320       v = qcoeff[rc];
 321       t = vp9_dct_value_tokens_ptr[v].token;
 322       if (use_fast_coef_costing) {
 323         cost += (*token_costs)[!prev_t][!prev_t][t] + vp9_dct_value_cost_ptr[v];
 324       } else {
 325         pt = get_coef_context(nb, token_cache, c);
 326         cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v];
 327         token_cache[rc] = vp9_pt_energy_class[t];
 328       }
 329       prev_t = t;
 330       if (!--band_left) {
 331         band_left = *band_count++;
 332         ++token_costs;
 333       }
 334     }
 335
 336     // eob token
 337     if (band_left) {
 338       if (use_fast_coef_costing) {
 339         cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
 340       } else {
 341         pt = get_coef_context(nb, token_cache, c);
 342         cost += (*token_costs)[0][pt][EOB_TOKEN];
 343       }
 344     }
 345   }
 346
 347   // is eob first coefficient;
 348   *A = *L = (c > 0);
 349
 350   return cost;
 351 }
 352 static void dist_block(int plane, int block, TX_SIZE tx_size,
 353                        struct rdcost_block_args* args) {
 354   const int ss_txfrm_size = tx_size << 1;
 355   MACROBLOCK* const x = args->x;
 356   MACROBLOCKD* const xd = &x->e_mbd;
 357   const struct macroblock_plane *const p = &x->plane[plane];
 358   const struct macroblockd_plane *const pd = &xd->plane[plane];
 359   int64_t this_sse;
 360   int shift = tx_size == TX_32X32 ? 0 : 2;
 361   tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
 362   tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
 363   args->dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
 364                                &this_sse) >> shift;
 365   args->sse  = this_sse >> shift;
 366
 367   if (x->skip_encode && !is_inter_block(&xd->mi[0]->mbmi)) {
 368     // TODO(jingning): tune the model to better capture the distortion.
 369     int64_t p = (pd->dequant[1] * pd->dequant[1] *
 370                     (1 << ss_txfrm_size)) >> (shift + 2);
 371     args->dist += (p >> 4);
 372     args->sse  += p;
 373   }
 374 }
 375
 376 static void rate_block(int plane, int block, BLOCK_SIZE plane_bsize,
 377                        TX_SIZE tx_size, struct rdcost_block_args* args) {
 378   int x_idx, y_idx;
 379   txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x_idx, &y_idx);
 380
 381   args->rate = cost_coeffs(args->x, plane, block, args->t_above + x_idx,
 382                            args->t_left + y_idx, tx_size,
 383                            args->so->scan, args->so->neighbors,
 384                            args->use_fast_coef_costing);
 385 }
 386
 387 static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
 388                           TX_SIZE tx_size, void *arg) {
 389   struct rdcost_block_args *args = arg;
 390   MACROBLOCK *const x = args->x;
 391   MACROBLOCKD *const xd = &x->e_mbd;
 392   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
 393   int64_t rd1, rd2, rd;
 394
 395   if (args->skip)
 396     return;
 397
 398   if (!is_inter_block(mbmi)) {
 399     vp9_encode_block_intra(x, plane, block, plane_bsize, tx_size, &mbmi->skip);
 400     dist_block(plane, block, tx_size, args);
 401   } else if (max_txsize_lookup[plane_bsize] == tx_size) {
 402     if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 0) {
 403       // full forward transform and quantization
 404       vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
 405       dist_block(plane, block, tx_size, args);
 406     } else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == 2) {
 407       // compute DC coefficient
 408       tran_low_t *const coeff   = BLOCK_OFFSET(x->plane[plane].coeff, block);
 409       tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block);
 410       vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size);
 411       args->sse  = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
 412       args->dist = args->sse;
 413       if (!x->plane[plane].eobs[block])
 414         args->dist = args->sse - ((coeff[0] * coeff[0] -
 415             (coeff[0] - dqcoeff[0]) * (coeff[0] - dqcoeff[0])) >> 2);
 416     } else {
 417       // skip forward transform
 418       x->plane[plane].eobs[block] = 0;
 419       args->sse  = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
 420       args->dist = args->sse;
 421     }
 422   } else {
 423     // full forward transform and quantization
 424     vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
 425     dist_block(plane, block, tx_size, args);
 426   }
 427
 428   rate_block(plane, block, plane_bsize, tx_size, args);
 429   rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist);
 430   rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse);
 431
 432   // TODO(jingning): temporarily enabled only for luma component
 433   rd = MIN(rd1, rd2);
 434   if (plane == 0)
 435     x->zcoeff_blk[tx_size][block] = !x->plane[plane].eobs[block] ||
 436                                     (rd1 > rd2 && !xd->lossless);
 437
 438   args->this_rate += args->rate;
 439   args->this_dist += args->dist;
 440   args->this_sse  += args->sse;
 441   args->this_rd += rd;
 442
 443   if (args->this_rd > args->best_rd) {
 444     args->skip = 1;
 445     return;
 446   }
 447 }
 448
 449 static void txfm_rd_in_plane(MACROBLOCK *x,
 450                              int *rate, int64_t *distortion,
 451                              int *skippable, int64_t *sse,
 452                              int64_t ref_best_rd, int plane,
 453                              BLOCK_SIZE bsize, TX_SIZE tx_size,
 454                              int use_fast_coef_casting) {
 455   MACROBLOCKD *const xd = &x->e_mbd;
 456   const struct macroblockd_plane *const pd = &xd->plane[plane];
 457   struct rdcost_block_args args;
 458   vp9_zero(args);
 459   args.x = x;
 460   args.best_rd = ref_best_rd;
 461   args.use_fast_coef_costing = use_fast_coef_casting;
 462
 463   if (plane == 0)
 464     xd->mi[0]->mbmi.tx_size = tx_size;
 465
 466   vp9_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
 467
 468   args.so = get_scan(xd, tx_size, pd->plane_type, 0);
 469
 470   vp9_foreach_transformed_block_in_plane(xd, bsize, plane,
 471                                          block_rd_txfm, &args);
 472   if (args.skip) {
 473     *rate       = INT_MAX;
 474     *distortion = INT64_MAX;
 475     *sse        = INT64_MAX;
 476     *skippable  = 0;
 477   } else {
 478     *distortion = args.this_dist;
 479     *rate       = args.this_rate;
 480     *sse        = args.this_sse;
 481     *skippable  = vp9_is_skippable_in_plane(x, bsize, plane);
 482   }
 483 }
 484
 485 static void choose_largest_tx_size(VP9_COMP *cpi, MACROBLOCK *x,
 486                                    int *rate, int64_t *distortion,
 487                                    int *skip, int64_t *sse,
 488                                    int64_t ref_best_rd,
 489                                    BLOCK_SIZE bs) {
 490   const TX_SIZE max_tx_size = max_txsize_lookup[bs];
 491   VP9_COMMON *const cm = &cpi->common;
 492   const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
 493   MACROBLOCKD *const xd = &x->e_mbd;
 494   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
 495
 496   mbmi->tx_size = MIN(max_tx_size, largest_tx_size);
 497
 498   txfm_rd_in_plane(x, rate, distortion, skip,
 499                    sse, ref_best_rd, 0, bs,
 500                    mbmi->tx_size, cpi->sf.use_fast_coef_costing);
 501 }
 502
 503 static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
 504                                    int *rate,
 505                                    int64_t *distortion,
 506                                    int *skip,
 507                                    int64_t *psse,
 508                                    int64_t tx_cache[TX_MODES],
 509                                    int64_t ref_best_rd,
 510                                    BLOCK_SIZE bs) {
 511   const TX_SIZE max_tx_size = max_txsize_lookup[bs];
 512   VP9_COMMON *const cm = &cpi->common;
 513   MACROBLOCKD *const xd = &x->e_mbd;
 514   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
 515   vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
 516   int r[TX_SIZES][2], s[TX_SIZES];
 517   int64_t d[TX_SIZES], sse[TX_SIZES];
 518   int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX},
 519                              {INT64_MAX, INT64_MAX},
 520                              {INT64_MAX, INT64_MAX},
 521                              {INT64_MAX, INT64_MAX}};
 522   int n, m;
 523   int s0, s1;
 524   const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
 525   int64_t best_rd = INT64_MAX;
 526   TX_SIZE best_tx = max_tx_size;
 527
 528   const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs);
 529   assert(skip_prob > 0);
 530   s0 = vp9_cost_bit(skip_prob, 0);
 531   s1 = vp9_cost_bit(skip_prob, 1);
 532
 533   for (n = max_tx_size; n >= 0;  n--) {
 534     txfm_rd_in_plane(x, &r[n][0], &d[n], &s[n],
 535                      &sse[n], ref_best_rd, 0, bs, n,
 536                      cpi->sf.use_fast_coef_costing);
 537     r[n][1] = r[n][0];
 538     if (r[n][0] < INT_MAX) {
 539       for (m = 0; m <= n - (n == (int) max_tx_size); m++) {
 540         if (m == n)
 541           r[n][1] += vp9_cost_zero(tx_probs[m]);
 542         else
 543           r[n][1] += vp9_cost_one(tx_probs[m]);
 544       }
 545     }
 546     if (d[n] == INT64_MAX) {
 547       rd[n][0] = rd[n][1] = INT64_MAX;
 548     } else if (s[n]) {
 549       rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
 550     } else {
 551       rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
 552       rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
 553     }
 554
 555     // Early termination in transform size search.
 556     if (cpi->sf.tx_size_search_breakout &&
 557         (rd[n][1] == INT64_MAX ||
 558         (n < (int) max_tx_size && rd[n][1] > rd[n + 1][1]) ||
 559         s[n] == 1))
 560       break;
 561
 562     if (rd[n][1] < best_rd) {
 563       best_tx = n;
 564       best_rd = rd[n][1];
 565     }
 566   }
 567   mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ?
 568                       best_tx : MIN(max_tx_size, max_mode_tx_size);
 569
 570
 571   *distortion = d[mbmi->tx_size];
 572   *rate       = r[mbmi->tx_size][cm->tx_mode == TX_MODE_SELECT];
 573   *skip       = s[mbmi->tx_size];
 574   *psse       = sse[mbmi->tx_size];
 575
 576   tx_cache[ONLY_4X4] = rd[TX_4X4][0];
 577   tx_cache[ALLOW_8X8] = rd[TX_8X8][0];
 578   tx_cache[ALLOW_16X16] = rd[MIN(max_tx_size, TX_16X16)][0];
 579   tx_cache[ALLOW_32X32] = rd[MIN(max_tx_size, TX_32X32)][0];
 580
 581   if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
 582     tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
 583   } else if (max_tx_size >= TX_16X16 && best_tx == TX_16X16) {
 584     tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
 585   } else if (rd[TX_8X8][1] < rd[TX_4X4][1]) {
 586     tx_cache[TX_MODE_SELECT] = rd[TX_8X8][1];
 587   } else {
 588     tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1];
 589   }
 590 }
 591
 592 static void super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
 593                             int64_t *distortion, int *skip,
 594                             int64_t *psse, BLOCK_SIZE bs,
 595                             int64_t txfm_cache[TX_MODES],
 596                             int64_t ref_best_rd) {
 597   MACROBLOCKD *xd = &x->e_mbd;
 598   int64_t sse;
 599   int64_t *ret_sse = psse ? psse : &sse;
 600
 601   assert(bs == xd->mi[0]->mbmi.sb_type);
 602
 603   if (cpi->sf.tx_size_search_method == USE_LARGESTALL || xd->lossless) {
 604     vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t));
 605     choose_largest_tx_size(cpi, x, rate, distortion, skip, ret_sse, ref_best_rd,
 606                            bs);
 607   } else {
 608     choose_tx_size_from_rd(cpi, x, rate, distortion, skip, ret_sse,
 609                            txfm_cache, ref_best_rd, bs);
 610   }
 611 }
 612
 613 static int conditional_skipintra(PREDICTION_MODE mode,
 614                                  PREDICTION_MODE best_intra_mode) {
 615   if (mode == D117_PRED &&
 616       best_intra_mode != V_PRED &&
 617       best_intra_mode != D135_PRED)
 618     return 1;
 619   if (mode == D63_PRED &&
 620       best_intra_mode != V_PRED &&
 621       best_intra_mode != D45_PRED)
 622     return 1;
 623   if (mode == D207_PRED &&
 624       best_intra_mode != H_PRED &&
 625       best_intra_mode != D45_PRED)
 626     return 1;
 627   if (mode == D153_PRED &&
 628       best_intra_mode != H_PRED &&
 629       best_intra_mode != D135_PRED)
 630     return 1;
 631   return 0;
 632 }
 633
 634 static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
 635                                      PREDICTION_MODE *best_mode,
 636                                      const int *bmode_costs,
 637                                      ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
 638                                      int *bestrate, int *bestratey,
 639                                      int64_t *bestdistortion,
 640                                      BLOCK_SIZE bsize, int64_t rd_thresh) {
 641   PREDICTION_MODE mode;
 642   MACROBLOCKD *const xd = &x->e_mbd;
 643   int64_t best_rd = rd_thresh;
 644
 645   struct macroblock_plane *p = &x->plane[0];
 646   struct macroblockd_plane *pd = &xd->plane[0];
 647   const int src_stride = p->src.stride;
 648   const int dst_stride = pd->dst.stride;
 649   const uint8_t *src_init = &p->src.buf[raster_block_offset(BLOCK_8X8, ib,
 650                                                             src_stride)];
 651   uint8_t *dst_init = &pd->dst.buf[raster_block_offset(BLOCK_8X8, ib,
 652                                                        dst_stride)];
 653   ENTROPY_CONTEXT ta[2], tempa[2];
 654   ENTROPY_CONTEXT tl[2], templ[2];
 655
 656   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
 657   const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
 658   int idx, idy;
 659   uint8_t best_dst[8 * 8];
 660
 661   assert(ib < 4);
 662
 663   vpx_memcpy(ta, a, sizeof(ta));
 664   vpx_memcpy(tl, l, sizeof(tl));
 665   xd->mi[0]->mbmi.tx_size = TX_4X4;
 666
 667   for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
 668     int64_t this_rd;
 669     int ratey = 0;
 670     int64_t distortion = 0;
 671     int rate = bmode_costs[mode];
 672
 673     if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode)))
 674       continue;
 675
 676     // Only do the oblique modes if the best so far is
 677     // one of the neighboring directional modes
 678     if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
 679       if (conditional_skipintra(mode, *best_mode))
 680           continue;
 681     }
 682
 683     vpx_memcpy(tempa, ta, sizeof(ta));
 684     vpx_memcpy(templ, tl, sizeof(tl));
 685
 686     for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
 687       for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
 688         const int block = ib + idy * 2 + idx;
 689         const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
 690         uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
 691         int16_t *const src_diff = raster_block_offset_int16(BLOCK_8X8, block,
 692                                                             p->src_diff);
 693         tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
 694         xd->mi[0]->bmi[block].as_mode = mode;
 695         vp9_predict_intra_block(xd, block, 1,
 696                                 TX_4X4, mode,
 697                                 x->skip_encode ? src : dst,
 698                                 x->skip_encode ? src_stride : dst_stride,
 699                                 dst, dst_stride, idx, idy, 0);
 700         vp9_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, dst_stride);
 701
 702         if (xd->lossless) {
 703           const scan_order *so = &vp9_default_scan_orders[TX_4X4];
 704           vp9_fwht4x4(src_diff, coeff, 8);
 705           vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
 706           ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
 707                                so->scan, so->neighbors,
 708                                cpi->sf.use_fast_coef_costing);
 709           if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
 710             goto next;
 711           vp9_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block), dst, dst_stride,
 712                           p->eobs[block]);
 713         } else {
 714           int64_t unused;
 715           const TX_TYPE tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block);
 716           const scan_order *so = &vp9_scan_orders[TX_4X4][tx_type];
 717           vp9_fht4x4(src_diff, coeff, 8, tx_type);
 718           vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
 719           ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
 720                              so->scan, so->neighbors,
 721                              cpi->sf.use_fast_coef_costing);
 722           distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block),
 723                                         16, &unused) >> 2;
 724           if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
 725             goto next;
 726           vp9_iht4x4_add(tx_type, BLOCK_OFFSET(pd->dqcoeff, block),
 727                          dst, dst_stride, p->eobs[block]);
 728         }
 729       }
 730     }
 731
 732     rate += ratey;
 733     this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
 734
 735     if (this_rd < best_rd) {
 736       *bestrate = rate;
 737       *bestratey = ratey;
 738       *bestdistortion = distortion;
 739       best_rd = this_rd;
 740       *best_mode = mode;
 741       vpx_memcpy(a, tempa, sizeof(tempa));
 742       vpx_memcpy(l, templ, sizeof(templ));
 743       for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
 744         vpx_memcpy(best_dst + idy * 8, dst_init + idy * dst_stride,
 745                    num_4x4_blocks_wide * 4);
 746     }
 747   next:
 748     {}
 749   }
 750
 751   if (best_rd >= rd_thresh || x->skip_encode)
 752     return best_rd;
 753
 754   for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
 755     vpx_memcpy(dst_init + idy * dst_stride, best_dst + idy * 8,
 756                num_4x4_blocks_wide * 4);
 757
 758   return best_rd;
 759 }
 760
 761 static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP *cpi, MACROBLOCK *mb,
 762                                             int *rate, int *rate_y,
 763                                             int64_t *distortion,
 764                                             int64_t best_rd) {
 765   int i, j;
 766   const MACROBLOCKD *const xd = &mb->e_mbd;
 767   MODE_INFO *const mic = xd->mi[0];
 768   const MODE_INFO *above_mi = xd->mi[-xd->mi_stride];
 769   const MODE_INFO *left_mi = xd->left_available ? xd->mi[-1] : NULL;
 770   const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
 771   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
 772   const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
 773   int idx, idy;
 774   int cost = 0;
 775   int64_t total_distortion = 0;
 776   int tot_rate_y = 0;
 777   int64_t total_rd = 0;
 778   ENTROPY_CONTEXT t_above[4], t_left[4];
 779   const int *bmode_costs = cpi->mbmode_cost;
 780
 781   vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
 782   vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
 783
 784   // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block.
 785   for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
 786     for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
 787       PREDICTION_MODE best_mode = DC_PRED;
 788       int r = INT_MAX, ry = INT_MAX;
 789       int64_t d = INT64_MAX, this_rd = INT64_MAX;
 790       i = idy * 2 + idx;
 791       if (cpi->common.frame_type == KEY_FRAME) {
 792         const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, i);
 793         const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, i);
 794
 795         bmode_costs  = cpi->y_mode_costs[A][L];
 796       }
 797
 798       this_rd = rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs,
 799                                       t_above + idx, t_left + idy, &r, &ry, &d,
 800                                       bsize, best_rd - total_rd);
 801       if (this_rd >= best_rd - total_rd)
 802         return INT64_MAX;
 803
 804       total_rd += this_rd;
 805       cost += r;
 806       total_distortion += d;
 807       tot_rate_y += ry;
 808
 809       mic->bmi[i].as_mode = best_mode;
 810       for (j = 1; j < num_4x4_blocks_high; ++j)
 811         mic->bmi[i + j * 2].as_mode = best_mode;
 812       for (j = 1; j < num_4x4_blocks_wide; ++j)
 813         mic->bmi[i + j].as_mode = best_mode;
 814
 815       if (total_rd >= best_rd)
 816         return INT64_MAX;
 817     }
 818   }
 819
 820   *rate = cost;
 821   *rate_y = tot_rate_y;
 822   *distortion = total_distortion;
 823   mic->mbmi.mode = mic->bmi[3].as_mode;
 824
 825   return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion);
 826 }
 827
 828 static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
 829                                       int *rate, int *rate_tokenonly,
 830                                       int64_t *distortion, int *skippable,
 831                                       BLOCK_SIZE bsize,
 832                                       int64_t tx_cache[TX_MODES],
 833                                       int64_t best_rd) {
 834   PREDICTION_MODE mode;
 835   PREDICTION_MODE mode_selected = DC_PRED;
 836   MACROBLOCKD *const xd = &x->e_mbd;
 837   MODE_INFO *const mic = xd->mi[0];
 838   int this_rate, this_rate_tokenonly, s;
 839   int64_t this_distortion, this_rd;
 840   TX_SIZE best_tx = TX_4X4;
 841   int i;
 842   int *bmode_costs = cpi->mbmode_cost;
 843
 844   if (cpi->sf.tx_size_search_method == USE_FULL_RD)
 845     for (i = 0; i < TX_MODES; i++)
 846       tx_cache[i] = INT64_MAX;
 847
 848   /* Y Search for intra prediction mode */
 849   for (mode = DC_PRED; mode <= TM_PRED; mode++) {
 850     int64_t local_tx_cache[TX_MODES];
 851     MODE_INFO *above_mi = xd->mi[-xd->mi_stride];
 852     MODE_INFO *left_mi = xd->left_available ? xd->mi[-1] : NULL;
 853
 854     if (cpi->common.frame_type == KEY_FRAME) {
 855       const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, 0);
 856       const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, 0);
 857
 858       bmode_costs = cpi->y_mode_costs[A][L];
 859     }
 860     mic->mbmi.mode = mode;
 861
 862     super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
 863         &s, NULL, bsize, local_tx_cache, best_rd);
 864
 865     if (this_rate_tokenonly == INT_MAX)
 866       continue;
 867
 868     this_rate = this_rate_tokenonly + bmode_costs[mode];
 869     this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
 870
 871     if (this_rd < best_rd) {
 872       mode_selected   = mode;
 873       best_rd         = this_rd;
 874       best_tx         = mic->mbmi.tx_size;
 875       *rate           = this_rate;
 876       *rate_tokenonly = this_rate_tokenonly;
 877       *distortion     = this_distortion;
 878       *skippable      = s;
 879     }
 880
 881     if (cpi->sf.tx_size_search_method == USE_FULL_RD && this_rd < INT64_MAX) {
 882       for (i = 0; i < TX_MODES && local_tx_cache[i] < INT64_MAX; i++) {
 883         const int64_t adj_rd = this_rd + local_tx_cache[i] -
 884             local_tx_cache[cpi->common.tx_mode];
 885         if (adj_rd < tx_cache[i]) {
 886           tx_cache[i] = adj_rd;
 887         }
 888       }
 889     }
 890   }
 891
 892   mic->mbmi.mode = mode_selected;
 893   mic->mbmi.tx_size = best_tx;
 894
 895   return best_rd;
 896 }
 897
 898 static void super_block_uvrd(const VP9_COMP *cpi, MACROBLOCK *x,
 899                              int *rate, int64_t *distortion, int *skippable,
 900                              int64_t *sse, BLOCK_SIZE bsize,
 901                              int64_t ref_best_rd) {
 902   MACROBLOCKD *const xd = &x->e_mbd;
 903   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
 904   const TX_SIZE uv_tx_size = get_uv_tx_size(mbmi, &xd->plane[1]);
 905   int plane;
 906   int pnrate = 0, pnskip = 1;
 907   int64_t pndist = 0, pnsse = 0;
 908
 909   if (ref_best_rd < 0)
 910     goto term;
 911
 912   if (is_inter_block(mbmi)) {
 913     int plane;
 914     for (plane = 1; plane < MAX_MB_PLANE; ++plane)
 915       vp9_subtract_plane(x, bsize, plane);
 916   }
 917
 918   *rate = 0;
 919   *distortion = 0;
 920   *sse = 0;
 921   *skippable = 1;
 922
 923   for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
 924     txfm_rd_in_plane(x, &pnrate, &pndist, &pnskip, &pnsse,
 925                      ref_best_rd, plane, bsize, uv_tx_size,
 926                      cpi->sf.use_fast_coef_costing);
 927     if (pnrate == INT_MAX)
 928       goto term;
 929     *rate += pnrate;
 930     *distortion += pndist;
 931     *sse += pnsse;
 932     *skippable &= pnskip;
 933   }
 934   return;
 935
 936   term:
 937   *rate = INT_MAX;
 938   *distortion = INT64_MAX;
 939   *sse = INT64_MAX;
 940   *skippable = 0;
 941   return;
 942 }
 943
 944 static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
 945                                        PICK_MODE_CONTEXT *ctx,
 946                                        int *rate, int *rate_tokenonly,
 947                                        int64_t *distortion, int *skippable,
 948                                        BLOCK_SIZE bsize, TX_SIZE max_tx_size) {
 949   MACROBLOCKD *xd = &x->e_mbd;
 950   PREDICTION_MODE mode;
 951   PREDICTION_MODE mode_selected = DC_PRED;
 952   int64_t best_rd = INT64_MAX, this_rd;
 953   int this_rate_tokenonly, this_rate, s;
 954   int64_t this_distortion, this_sse;
 955
 956   for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
 957     if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode)))
 958       continue;
 959
 960     xd->mi[0]->mbmi.uv_mode = mode;
 961
 962     super_block_uvrd(cpi, x, &this_rate_tokenonly,
 963                      &this_distortion, &s, &this_sse, bsize, best_rd);
 964     if (this_rate_tokenonly == INT_MAX)
 965       continue;
 966     this_rate = this_rate_tokenonly +
 967                 cpi->intra_uv_mode_cost[cpi->common.frame_type][mode];
 968     this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
 969
 970     if (this_rd < best_rd) {
 971       mode_selected   = mode;
 972       best_rd         = this_rd;
 973       *rate           = this_rate;
 974       *rate_tokenonly = this_rate_tokenonly;
 975       *distortion     = this_distortion;
 976       *skippable      = s;
 977       if (!x->select_tx_size)
 978         swap_block_ptr(x, ctx, 2, 0, 1, MAX_MB_PLANE);
 979     }
 980   }
 981
 982   xd->mi[0]->mbmi.uv_mode = mode_selected;
 983   return best_rd;
 984 }
 985
 986 static int64_t rd_sbuv_dcpred(const VP9_COMP *cpi, MACROBLOCK *x,
 987                               int *rate, int *rate_tokenonly,
 988                               int64_t *distortion, int *skippable,
 989                               BLOCK_SIZE bsize) {
 990   const VP9_COMMON *cm = &cpi->common;
 991   int64_t unused;
 992
 993   x->e_mbd.mi[0]->mbmi.uv_mode = DC_PRED;
 994   super_block_uvrd(cpi, x, rate_tokenonly, distortion,
 995                    skippable, &unused, bsize, INT64_MAX);
 996   *rate = *rate_tokenonly + cpi->intra_uv_mode_cost[cm->frame_type][DC_PRED];
 997   return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
 998 }
 999
1000 static void choose_intra_uv_mode(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
1001                                  BLOCK_SIZE bsize, TX_SIZE max_tx_size,
1002                                  int *rate_uv, int *rate_uv_tokenonly,
1003                                  int64_t *dist_uv, int *skip_uv,
1004                                  PREDICTION_MODE *mode_uv) {
1005   MACROBLOCK *const x = &cpi->mb;
1006
1007   // Use an estimated rd for uv_intra based on DC_PRED if the
1008   // appropriate speed flag is set.
1009   if (cpi->sf.use_uv_intra_rd_estimate) {
1010     rd_sbuv_dcpred(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv,
1011                    skip_uv, bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
1012   // Else do a proper rd search for each possible transform size that may
1013   // be considered in the main rd loop.
1014   } else {
1015     rd_pick_intra_sbuv_mode(cpi, x, ctx,
1016                             rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
1017                             bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize, max_tx_size);
1018   }
1019   *mode_uv = x->e_mbd.mi[0]->mbmi.uv_mode;
1020 }
1021
1022 static int cost_mv_ref(const VP9_COMP *cpi, PREDICTION_MODE mode,
1023                        int mode_context) {
1024   assert(is_inter_mode(mode));
1025   return cpi->inter_mode_cost[mode_context][INTER_OFFSET(mode)];
1026 }
1027
1028 static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
1029                                 BLOCK_SIZE bsize,
1030                                 int_mv *frame_mv,
1031                                 int mi_row, int mi_col,
1032                                 int_mv single_newmv[MAX_REF_FRAMES],
1033                                 int *rate_mv);
1034
1035 static int set_and_cost_bmi_mvs(VP9_COMP *cpi, MACROBLOCKD *xd, int i,
1036                                 PREDICTION_MODE mode, int_mv this_mv[2],
1037                                 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
1038                                 int_mv seg_mvs[MAX_REF_FRAMES],
1039                                 int_mv *best_ref_mv[2], const int *mvjcost,
1040                                 int *mvcost[2]) {
1041   MODE_INFO *const mic = xd->mi[0];
1042   const MB_MODE_INFO *const mbmi = &mic->mbmi;
1043   int thismvcost = 0;
1044   int idx, idy;
1045   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
1046   const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
1047   const int is_compound = has_second_ref(mbmi);
1048
1049   switch (mode) {
1050     case NEWMV:
1051       this_mv[0].as_int = seg_mvs[mbmi->ref_frame[0]].as_int;
1052       thismvcost += vp9_mv_bit_cost(&this_mv[0].as_mv, &best_ref_mv[0]->as_mv,
1053                                     mvjcost, mvcost, MV_COST_WEIGHT_SUB);
1054       if (is_compound) {
1055         this_mv[1].as_int = seg_mvs[mbmi->ref_frame[1]].as_int;
1056         thismvcost += vp9_mv_bit_cost(&this_mv[1].as_mv, &best_ref_mv[1]->as_mv,
1057                                       mvjcost, mvcost, MV_COST_WEIGHT_SUB);
1058       }
1059       break;
1060     case NEARMV:
1061     case NEARESTMV:
1062       this_mv[0].as_int = frame_mv[mode][mbmi->ref_frame[0]].as_int;
1063       if (is_compound)
1064         this_mv[1].as_int = frame_mv[mode][mbmi->ref_frame[1]].as_int;
1065       break;
1066     case ZEROMV:
1067       this_mv[0].as_int = 0;
1068       if (is_compound)
1069         this_mv[1].as_int = 0;
1070       break;
1071     default:
1072       break;
1073   }
1074
1075   mic->bmi[i].as_mv[0].as_int = this_mv[0].as_int;
1076   if (is_compound)
1077     mic->bmi[i].as_mv[1].as_int = this_mv[1].as_int;
1078
1079   mic->bmi[i].as_mode = mode;
1080
1081   for (idy = 0; idy < num_4x4_blocks_high; ++idy)
1082     for (idx = 0; idx < num_4x4_blocks_wide; ++idx)
1083       vpx_memcpy(&mic->bmi[i + idy * 2 + idx],
1084                  &mic->bmi[i], sizeof(mic->bmi[i]));
1085
1086   return cost_mv_ref(cpi, mode, mbmi->mode_context[mbmi->ref_frame[0]]) +
1087             thismvcost;
1088 }
1089
1090 static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
1091                                        MACROBLOCK *x,
1092                                        int64_t best_yrd,
1093                                        int i,
1094                                        int *labelyrate,
1095                                        int64_t *distortion, int64_t *sse,
1096                                        ENTROPY_CONTEXT *ta,
1097                                        ENTROPY_CONTEXT *tl,
1098                                        int mi_row, int mi_col) {
1099   int k;
1100   MACROBLOCKD *xd = &x->e_mbd;
1101   struct macroblockd_plane *const pd = &xd->plane[0];
1102   struct macroblock_plane *const p = &x->plane[0];
1103   MODE_INFO *const mi = xd->mi[0];
1104   const BLOCK_SIZE plane_bsize = get_plane_block_size(mi->mbmi.sb_type, pd);
1105   const int width = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
1106   const int height = 4 * num_4x4_blocks_high_lookup[plane_bsize];
1107   int idx, idy;
1108
1109   const uint8_t *const src = &p->src.buf[raster_block_offset(BLOCK_8X8, i,
1110                                                              p->src.stride)];
1111   uint8_t *const dst = &pd->dst.buf[raster_block_offset(BLOCK_8X8, i,
1112                                                         pd->dst.stride)];
1113   int64_t thisdistortion = 0, thissse = 0;
1114   int thisrate = 0, ref;
1115   const scan_order *so = &vp9_default_scan_orders[TX_4X4];
1116   const int is_compound = has_second_ref(&mi->mbmi);
1117   const InterpKernel *kernel = vp9_get_interp_kernel(mi->mbmi.interp_filter);
1118
1119   for (ref = 0; ref < 1 + is_compound; ++ref) {
1120     const uint8_t *pre = &pd->pre[ref].buf[raster_block_offset(BLOCK_8X8, i,
1121                                                pd->pre[ref].stride)];
1122     vp9_build_inter_predictor(pre, pd->pre[ref].stride,
1123                               dst, pd->dst.stride,
1124                               &mi->bmi[i].as_mv[ref].as_mv,
1125                               &xd->block_refs[ref]->sf, width, height, ref,
1126                               kernel, MV_PRECISION_Q3,
1127                               mi_col * MI_SIZE + 4 * (i % 2),
1128                               mi_row * MI_SIZE + 4 * (i / 2));
1129   }
1130
1131   vp9_subtract_block(height, width,
1132                      raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 8,
1133                      src, p->src.stride,
1134                      dst, pd->dst.stride);
1135
1136   k = i;
1137   for (idy = 0; idy < height / 4; ++idy) {
1138     for (idx = 0; idx < width / 4; ++idx) {
1139       int64_t ssz, rd, rd1, rd2;
1140       tran_low_t* coeff;
1141
1142       k += (idy * 2 + idx);
1143       coeff = BLOCK_OFFSET(p->coeff, k);
1144       x->fwd_txm4x4(raster_block_offset_int16(BLOCK_8X8, k, p->src_diff),
1145                     coeff, 8);
1146       vp9_regular_quantize_b_4x4(x, 0, k, so->scan, so->iscan);
1147       thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k),
1148                                         16, &ssz);
1149       thissse += ssz;
1150       thisrate += cost_coeffs(x, 0, k, ta + (k & 1), tl + (k >> 1), TX_4X4,
1151                               so->scan, so->neighbors,
1152                               cpi->sf.use_fast_coef_costing);
1153       rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion >> 2);
1154       rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse >> 2);
1155       rd = MIN(rd1, rd2);
1156       if (rd >= best_yrd)
1157         return INT64_MAX;
1158     }
1159   }
1160
1161   *distortion = thisdistortion >> 2;
1162   *labelyrate = thisrate;
1163   *sse = thissse >> 2;
1164
1165   return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion);
1166 }
1167
1168 typedef struct {
1169   int eobs;
1170   int brate;
1171   int byrate;
1172   int64_t bdist;
1173   int64_t bsse;
1174   int64_t brdcost;
1175   int_mv mvs[2];
1176   ENTROPY_CONTEXT ta[2];
1177   ENTROPY_CONTEXT tl[2];
1178 } SEG_RDSTAT;
1179
1180 typedef struct {
1181   int_mv *ref_mv[2];
1182   int_mv mvp;
1183
1184   int64_t segment_rd;
1185   int r;
1186   int64_t d;
1187   int64_t sse;
1188   int segment_yrate;
1189   PREDICTION_MODE modes[4];
1190   SEG_RDSTAT rdstat[4][INTER_MODES];
1191   int mvthresh;
1192 } BEST_SEG_INFO;
1193
1194 static INLINE int mv_check_bounds(const MACROBLOCK *x, const MV *mv) {
1195   return (mv->row >> 3) < x->mv_row_min ||
1196          (mv->row >> 3) > x->mv_row_max ||
1197          (mv->col >> 3) < x->mv_col_min ||
1198          (mv->col >> 3) > x->mv_col_max;
1199 }
1200
1201 static INLINE void mi_buf_shift(MACROBLOCK *x, int i) {
1202   MB_MODE_INFO *const mbmi = &x->e_mbd.mi[0]->mbmi;
1203   struct macroblock_plane *const p = &x->plane[0];
1204   struct macroblockd_plane *const pd = &x->e_mbd.plane[0];
1205
1206   p->src.buf = &p->src.buf[raster_block_offset(BLOCK_8X8, i, p->src.stride)];
1207   assert(((intptr_t)pd->pre[0].buf & 0x7) == 0);
1208   pd->pre[0].buf = &pd->pre[0].buf[raster_block_offset(BLOCK_8X8, i,
1209                                                        pd->pre[0].stride)];
1210   if (has_second_ref(mbmi))
1211     pd->pre[1].buf = &pd->pre[1].buf[raster_block_offset(BLOCK_8X8, i,
1212                                                          pd->pre[1].stride)];
1213 }
1214
1215 static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src,
1216                                   struct buf_2d orig_pre[2]) {
1217   MB_MODE_INFO *mbmi = &x->e_mbd.mi[0]->mbmi;
1218   x->plane[0].src = orig_src;
1219   x->e_mbd.plane[0].pre[0] = orig_pre[0];
1220   if (has_second_ref(mbmi))
1221     x->e_mbd.plane[0].pre[1] = orig_pre[1];
1222 }
1223
1224 static INLINE int mv_has_subpel(const MV *mv) {
1225   return (mv->row & 0x0F) || (mv->col & 0x0F);
1226 }
1227
1228 // Check if NEARESTMV/NEARMV/ZEROMV is the cheapest way encode zero motion.
1229 // TODO(aconverse): Find out if this is still productive then clean up or remove
1230 static int check_best_zero_mv(
1231     const VP9_COMP *cpi, const uint8_t mode_context[MAX_REF_FRAMES],
1232     int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES], int this_mode,
1233     const MV_REFERENCE_FRAME ref_frames[2]) {
1234   if ((this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) &&
1235       frame_mv[this_mode][ref_frames[0]].as_int == 0 &&
1236       (ref_frames[1] == NONE ||
1237        frame_mv[this_mode][ref_frames[1]].as_int == 0)) {
1238     int rfc = mode_context[ref_frames[0]];
1239     int c1 = cost_mv_ref(cpi, NEARMV, rfc);
1240     int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
1241     int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
1242
1243     if (this_mode == NEARMV) {
1244       if (c1 > c3) return 0;
1245     } else if (this_mode == NEARESTMV) {
1246       if (c2 > c3) return 0;
1247     } else {
1248       assert(this_mode == ZEROMV);
1249       if (ref_frames[1] == NONE) {
1250         if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0) ||
1251             (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0))
1252           return 0;
1253       } else {
1254         if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0 &&
1255              frame_mv[NEARESTMV][ref_frames[1]].as_int == 0) ||
1256             (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0 &&
1257              frame_mv[NEARMV][ref_frames[1]].as_int == 0))
1258           return 0;
1259       }
1260     }
1261   }
1262   return 1;
1263 }
1264
1265 static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x,
1266                                         const TileInfo * const tile,
1267                                         int_mv *best_ref_mv,
1268                                         int_mv *second_best_ref_mv,
1269                                         int64_t best_rd, int *returntotrate,
1270                                         int *returnyrate,
1271                                         int64_t *returndistortion,
1272                                         int *skippable, int64_t *psse,
1273                                         int mvthresh,
1274                                         int_mv seg_mvs[4][MAX_REF_FRAMES],
1275                                         BEST_SEG_INFO *bsi_buf, int filter_idx,
1276                                         int mi_row, int mi_col) {
1277   int i;
1278   BEST_SEG_INFO *bsi = bsi_buf + filter_idx;
1279   MACROBLOCKD *xd = &x->e_mbd;
1280   MODE_INFO *mi = xd->mi[0];
1281   MB_MODE_INFO *mbmi = &mi->mbmi;
1282   int mode_idx;
1283   int k, br = 0, idx, idy;
1284   int64_t bd = 0, block_sse = 0;
1285   PREDICTION_MODE this_mode;
1286   VP9_COMMON *cm = &cpi->common;
1287   struct macroblock_plane *const p = &x->plane[0];
1288   struct macroblockd_plane *const pd = &xd->plane[0];
1289   const int label_count = 4;
1290   int64_t this_segment_rd = 0;
1291   int label_mv_thresh;
1292   int segmentyrate = 0;
1293   const BLOCK_SIZE bsize = mbmi->sb_type;
1294   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1295   const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1296   ENTROPY_CONTEXT t_above[2], t_left[2];
1297   int subpelmv = 1, have_ref = 0;
1298   const int has_second_rf = has_second_ref(mbmi);
1299   const int inter_mode_mask = cpi->sf.inter_mode_mask[bsize];
1300
1301   vp9_zero(*bsi);
1302
1303   bsi->segment_rd = best_rd;
1304   bsi->ref_mv[0] = best_ref_mv;
1305   bsi->ref_mv[1] = second_best_ref_mv;
1306   bsi->mvp.as_int = best_ref_mv->as_int;
1307   bsi->mvthresh = mvthresh;
1308
1309   for (i = 0; i < 4; i++)
1310     bsi->modes[i] = ZEROMV;
1311
1312   vpx_memcpy(t_above, pd->above_context, sizeof(t_above));
1313   vpx_memcpy(t_left, pd->left_context, sizeof(t_left));
1314
1315   // 64 makes this threshold really big effectively
1316   // making it so that we very rarely check mvs on
1317   // segments.   setting this to 1 would make mv thresh
1318   // roughly equal to what it is for macroblocks
1319   label_mv_thresh = 1 * bsi->mvthresh / label_count;
1320
1321   // Segmentation method overheads
1322   for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
1323     for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
1324       // TODO(jingning,rbultje): rewrite the rate-distortion optimization
1325       // loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop
1326       int_mv mode_mv[MB_MODE_COUNT][2];
1327       int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
1328       PREDICTION_MODE mode_selected = ZEROMV;
1329       int64_t best_rd = INT64_MAX;
1330       const int i = idy * 2 + idx;
1331       int ref;
1332
1333       for (ref = 0; ref < 1 + has_second_rf; ++ref) {
1334         const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
1335         frame_mv[ZEROMV][frame].as_int = 0;
1336         vp9_append_sub8x8_mvs_for_idx(cm, xd, tile, i, ref, mi_row, mi_col,
1337                                       &frame_mv[NEARESTMV][frame],
1338                                       &frame_mv[NEARMV][frame]);
1339       }
1340
1341       // search for the best motion vector on this segment
1342       for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
1343         const struct buf_2d orig_src = x->plane[0].src;
1344         struct buf_2d orig_pre[2];
1345
1346         mode_idx = INTER_OFFSET(this_mode);
1347         bsi->rdstat[i][mode_idx].brdcost = INT64_MAX;
1348         if (!(inter_mode_mask & (1 << this_mode)))
1349           continue;
1350
1351         if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv,
1352                                 this_mode, mbmi->ref_frame))
1353           continue;
1354
1355         vpx_memcpy(orig_pre, pd->pre, sizeof(orig_pre));
1356         vpx_memcpy(bsi->rdstat[i][mode_idx].ta, t_above,
1357                    sizeof(bsi->rdstat[i][mode_idx].ta));
1358         vpx_memcpy(bsi->rdstat[i][mode_idx].tl, t_left,
1359                    sizeof(bsi->rdstat[i][mode_idx].tl));
1360
1361         // motion search for newmv (single predictor case only)
1362         if (!has_second_rf && this_mode == NEWMV &&
1363             seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) {
1364           MV *const new_mv = &mode_mv[NEWMV][0].as_mv;
1365           int step_param = 0;
1366           int thissme, bestsme = INT_MAX;
1367           int sadpb = x->sadperbit4;
1368           MV mvp_full;
1369           int max_mv;
1370           int sad_list[5];
1371
1372           /* Is the best so far sufficiently good that we cant justify doing
1373            * and new motion search. */
1374           if (best_rd < label_mv_thresh)
1375             break;
1376
1377           if (cpi->oxcf.mode != BEST) {
1378             // use previous block's result as next block's MV predictor.
1379             if (i > 0) {
1380               bsi->mvp.as_int = mi->bmi[i - 1].as_mv[0].as_int;
1381               if (i == 2)
1382                 bsi->mvp.as_int = mi->bmi[i - 2].as_mv[0].as_int;
1383             }
1384           }
1385           if (i == 0)
1386             max_mv = x->max_mv_context[mbmi->ref_frame[0]];
1387           else
1388             max_mv = MAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3;
1389
1390           if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) {
1391             // Take wtd average of the step_params based on the last frame's
1392             // max mv magnitude and the best ref mvs of the current block for
1393             // the given reference.
1394             step_param = (vp9_init_search_range(max_mv) +
1395                               cpi->mv_step_param) / 2;
1396           } else {
1397             step_param = cpi->mv_step_param;
1398           }
1399
1400           mvp_full.row = bsi->mvp.as_mv.row >> 3;
1401           mvp_full.col = bsi->mvp.as_mv.col >> 3;
1402
1403           if (cpi->sf.adaptive_motion_search) {
1404             mvp_full.row = x->pred_mv[mbmi->ref_frame[0]].row >> 3;
1405             mvp_full.col = x->pred_mv[mbmi->ref_frame[0]].col >> 3;
1406             step_param = MAX(step_param, 8);
1407           }
1408
1409           // adjust src pointer for this block
1410           mi_buf_shift(x, i);
1411
1412           vp9_set_mv_search_range(x, &bsi->ref_mv[0]->as_mv);
1413
1414           bestsme = vp9_full_pixel_search(
1415               cpi, x, bsize, &mvp_full, step_param, sadpb,
1416               cpi->sf.mv.subpel_search_method != SUBPEL_TREE ? sad_list : NULL,
1417               &bsi->ref_mv[0]->as_mv, new_mv,
1418               INT_MAX, 1);
1419
1420           // Should we do a full search (best quality only)
1421           if (cpi->oxcf.mode == BEST) {
1422             int_mv *const best_mv = &mi->bmi[i].as_mv[0];
1423             /* Check if mvp_full is within the range. */
1424             clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max,
1425                      x->mv_row_min, x->mv_row_max);
1426             thissme = cpi->full_search_sad(x, &mvp_full,
1427                                            sadpb, 16, &cpi->fn_ptr[bsize],
1428                                            &bsi->ref_mv[0]->as_mv,
1429                                            &best_mv->as_mv);
1430             sad_list[1] = sad_list[2] = sad_list[3] = sad_list[4] = INT_MAX;
1431             if (thissme < bestsme) {
1432               bestsme = thissme;
1433               *new_mv = best_mv->as_mv;
1434             } else {
1435               // The full search result is actually worse so re-instate the
1436               // previous best vector
1437               best_mv->as_mv = *new_mv;
1438             }
1439           }
1440
1441           if (bestsme < INT_MAX) {
1442             int distortion;
1443             cpi->find_fractional_mv_step(
1444                 x,
1445                 new_mv,
1446                 &bsi->ref_mv[0]->as_mv,
1447                 cm->allow_high_precision_mv,
1448                 x->errorperbit, &cpi->fn_ptr[bsize],
1449                 cpi->sf.mv.subpel_force_stop,
1450                 cpi->sf.mv.subpel_iters_per_step,
1451                 cond_sad_list(cpi, sad_list),
1452                 x->nmvjointcost, x->mvcost,
1453                 &distortion,
1454                 &x->pred_sse[mbmi->ref_frame[0]],
1455                 NULL, 0, 0);
1456
1457             // save motion search result for use in compound prediction
1458             seg_mvs[i][mbmi->ref_frame[0]].as_mv = *new_mv;
1459           }
1460
1461           if (cpi->sf.adaptive_motion_search)
1462             x->pred_mv[mbmi->ref_frame[0]] = *new_mv;
1463
1464           // restore src pointers
1465           mi_buf_restore(x, orig_src, orig_pre);
1466         }
1467
1468         if (has_second_rf) {
1469           if (seg_mvs[i][mbmi->ref_frame[1]].as_int == INVALID_MV ||
1470               seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV)
1471             continue;
1472         }
1473
1474         if (has_second_rf && this_mode == NEWMV &&
1475             mbmi->interp_filter == EIGHTTAP) {
1476           // adjust src pointers
1477           mi_buf_shift(x, i);
1478           if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
1479             int rate_mv;
1480             joint_motion_search(cpi, x, bsize, frame_mv[this_mode],
1481                                 mi_row, mi_col, seg_mvs[i],
1482                                 &rate_mv);
1483             seg_mvs[i][mbmi->ref_frame[0]].as_int =
1484                 frame_mv[this_mode][mbmi->ref_frame[0]].as_int;
1485             seg_mvs[i][mbmi->ref_frame[1]].as_int =
1486                 frame_mv[this_mode][mbmi->ref_frame[1]].as_int;
1487           }
1488           // restore src pointers
1489           mi_buf_restore(x, orig_src, orig_pre);
1490         }
1491
1492         bsi->rdstat[i][mode_idx].brate =
1493             set_and_cost_bmi_mvs(cpi, xd, i, this_mode, mode_mv[this_mode],
1494                                  frame_mv, seg_mvs[i], bsi->ref_mv,
1495                                  x->nmvjointcost, x->mvcost);
1496
1497         for (ref = 0; ref < 1 + has_second_rf; ++ref) {
1498           bsi->rdstat[i][mode_idx].mvs[ref].as_int =
1499               mode_mv[this_mode][ref].as_int;
1500           if (num_4x4_blocks_wide > 1)
1501             bsi->rdstat[i + 1][mode_idx].mvs[ref].as_int =
1502                 mode_mv[this_mode][ref].as_int;
1503           if (num_4x4_blocks_high > 1)
1504             bsi->rdstat[i + 2][mode_idx].mvs[ref].as_int =
1505                 mode_mv[this_mode][ref].as_int;
1506         }
1507
1508         // Trap vectors that reach beyond the UMV borders
1509         if (mv_check_bounds(x, &mode_mv[this_mode][0].as_mv) ||
1510             (has_second_rf &&
1511              mv_check_bounds(x, &mode_mv[this_mode][1].as_mv)))
1512           continue;
1513
1514         if (filter_idx > 0) {
1515           BEST_SEG_INFO *ref_bsi = bsi_buf;
1516           subpelmv = 0;
1517           have_ref = 1;
1518
1519           for (ref = 0; ref < 1 + has_second_rf; ++ref) {
1520             subpelmv |= mv_has_subpel(&mode_mv[this_mode][ref].as_mv);
1521             have_ref &= mode_mv[this_mode][ref].as_int ==
1522                 ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int;
1523           }
1524
1525           if (filter_idx > 1 && !subpelmv && !have_ref) {
1526             ref_bsi = bsi_buf + 1;
1527             have_ref = 1;
1528             for (ref = 0; ref < 1 + has_second_rf; ++ref)
1529               have_ref &= mode_mv[this_mode][ref].as_int ==
1530                   ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int;
1531           }
1532
1533           if (!subpelmv && have_ref &&
1534               ref_bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
1535             vpx_memcpy(&bsi->rdstat[i][mode_idx], &ref_bsi->rdstat[i][mode_idx],
1536                        sizeof(SEG_RDSTAT));
1537             if (num_4x4_blocks_wide > 1)
1538               bsi->rdstat[i + 1][mode_idx].eobs =
1539                   ref_bsi->rdstat[i + 1][mode_idx].eobs;
1540             if (num_4x4_blocks_high > 1)
1541               bsi->rdstat[i + 2][mode_idx].eobs =
1542                   ref_bsi->rdstat[i + 2][mode_idx].eobs;
1543
1544             if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
1545               mode_selected = this_mode;
1546               best_rd = bsi->rdstat[i][mode_idx].brdcost;
1547             }
1548             continue;
1549           }
1550         }
1551
1552         bsi->rdstat[i][mode_idx].brdcost =
1553             encode_inter_mb_segment(cpi, x,
1554                                     bsi->segment_rd - this_segment_rd, i,
1555                                     &bsi->rdstat[i][mode_idx].byrate,
1556                                     &bsi->rdstat[i][mode_idx].bdist,
1557                                     &bsi->rdstat[i][mode_idx].bsse,
1558                                     bsi->rdstat[i][mode_idx].ta,
1559                                     bsi->rdstat[i][mode_idx].tl,
1560                                     mi_row, mi_col);
1561         if (bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
1562           bsi->rdstat[i][mode_idx].brdcost += RDCOST(x->rdmult, x->rddiv,
1563                                             bsi->rdstat[i][mode_idx].brate, 0);
1564           bsi->rdstat[i][mode_idx].brate += bsi->rdstat[i][mode_idx].byrate;
1565           bsi->rdstat[i][mode_idx].eobs = p->eobs[i];
1566           if (num_4x4_blocks_wide > 1)
1567             bsi->rdstat[i + 1][mode_idx].eobs = p->eobs[i + 1];
1568           if (num_4x4_blocks_high > 1)
1569             bsi->rdstat[i + 2][mode_idx].eobs = p->eobs[i + 2];
1570         }
1571
1572         if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
1573           mode_selected = this_mode;
1574           best_rd = bsi->rdstat[i][mode_idx].brdcost;
1575         }
1576       } /*for each 4x4 mode*/
1577
1578       if (best_rd == INT64_MAX) {
1579         int iy, midx;
1580         for (iy = i + 1; iy < 4; ++iy)
1581           for (midx = 0; midx < INTER_MODES; ++midx)
1582             bsi->rdstat[iy][midx].brdcost = INT64_MAX;
1583         bsi->segment_rd = INT64_MAX;
1584         return INT64_MAX;;
1585       }
1586
1587       mode_idx = INTER_OFFSET(mode_selected);
1588       vpx_memcpy(t_above, bsi->rdstat[i][mode_idx].ta, sizeof(t_above));
1589       vpx_memcpy(t_left, bsi->rdstat[i][mode_idx].tl, sizeof(t_left));
1590
1591       set_and_cost_bmi_mvs(cpi, xd, i, mode_selected, mode_mv[mode_selected],
1592                            frame_mv, seg_mvs[i], bsi->ref_mv, x->nmvjointcost,
1593                            x->mvcost);
1594
1595       br += bsi->rdstat[i][mode_idx].brate;
1596       bd += bsi->rdstat[i][mode_idx].bdist;
1597       block_sse += bsi->rdstat[i][mode_idx].bsse;
1598       segmentyrate += bsi->rdstat[i][mode_idx].byrate;
1599       this_segment_rd += bsi->rdstat[i][mode_idx].brdcost;
1600
1601       if (this_segment_rd > bsi->segment_rd) {
1602         int iy, midx;
1603         for (iy = i + 1; iy < 4; ++iy)
1604           for (midx = 0; midx < INTER_MODES; ++midx)
1605             bsi->rdstat[iy][midx].brdcost = INT64_MAX;
1606         bsi->segment_rd = INT64_MAX;
1607         return INT64_MAX;;
1608       }
1609     }
1610   } /* for each label */
1611
1612   bsi->r = br;
1613   bsi->d = bd;
1614   bsi->segment_yrate = segmentyrate;
1615   bsi->segment_rd = this_segment_rd;
1616   bsi->sse = block_sse;
1617
1618   // update the coding decisions
1619   for (k = 0; k < 4; ++k)
1620     bsi->modes[k] = mi->bmi[k].as_mode;
1621
1622   if (bsi->segment_rd > best_rd)
1623     return INT64_MAX;
1624   /* set it to the best */
1625   for (i = 0; i < 4; i++) {
1626     mode_idx = INTER_OFFSET(bsi->modes[i]);
1627     mi->bmi[i].as_mv[0].as_int = bsi->rdstat[i][mode_idx].mvs[0].as_int;
1628     if (has_second_ref(mbmi))
1629       mi->bmi[i].as_mv[1].as_int = bsi->rdstat[i][mode_idx].mvs[1].as_int;
1630     x->plane[0].eobs[i] = bsi->rdstat[i][mode_idx].eobs;
1631     mi->bmi[i].as_mode = bsi->modes[i];
1632   }
1633
1634   /*
1635    * used to set mbmi->mv.as_int
1636    */
1637   *returntotrate = bsi->r;
1638   *returndistortion = bsi->d;
1639   *returnyrate = bsi->segment_yrate;
1640   *skippable = vp9_is_skippable_in_plane(x, BLOCK_8X8, 0);
1641   *psse = bsi->sse;
1642   mbmi->mode = bsi->modes[3];
1643
1644   return bsi->segment_rd;
1645 }
1646
1647 static void estimate_ref_frame_costs(const VP9_COMMON *cm,
1648                                      const MACROBLOCKD *xd,
1649                                      int segment_id,
1650                                      unsigned int *ref_costs_single,
1651                                      unsigned int *ref_costs_comp,
1652                                      vp9_prob *comp_mode_p) {
1653   int seg_ref_active = vp9_segfeature_active(&cm->seg, segment_id,
1654                                              SEG_LVL_REF_FRAME);
1655   if (seg_ref_active) {
1656     vpx_memset(ref_costs_single, 0, MAX_REF_FRAMES * sizeof(*ref_costs_single));
1657     vpx_memset(ref_costs_comp,   0, MAX_REF_FRAMES * sizeof(*ref_costs_comp));
1658     *comp_mode_p = 128;
1659   } else {
1660     vp9_prob intra_inter_p = vp9_get_intra_inter_prob(cm, xd);
1661     vp9_prob comp_inter_p = 128;
1662
1663     if (cm->reference_mode == REFERENCE_MODE_SELECT) {
1664       comp_inter_p = vp9_get_reference_mode_prob(cm, xd);
1665       *comp_mode_p = comp_inter_p;
1666     } else {
1667       *comp_mode_p = 128;
1668     }
1669
1670     ref_costs_single[INTRA_FRAME] = vp9_cost_bit(intra_inter_p, 0);
1671
1672     if (cm->reference_mode != COMPOUND_REFERENCE) {
1673       vp9_prob ref_single_p1 = vp9_get_pred_prob_single_ref_p1(cm, xd);
1674       vp9_prob ref_single_p2 = vp9_get_pred_prob_single_ref_p2(cm, xd);
1675       unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1);
1676
1677       if (cm->reference_mode == REFERENCE_MODE_SELECT)
1678         base_cost += vp9_cost_bit(comp_inter_p, 0);
1679
1680       ref_costs_single[LAST_FRAME] = ref_costs_single[GOLDEN_FRAME] =
1681           ref_costs_single[ALTREF_FRAME] = base_cost;
1682       ref_costs_single[LAST_FRAME]   += vp9_cost_bit(ref_single_p1, 0);
1683       ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p1, 1);
1684       ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p1, 1);
1685       ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p2, 0);
1686       ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p2, 1);
1687     } else {
1688       ref_costs_single[LAST_FRAME]   = 512;
1689       ref_costs_single[GOLDEN_FRAME] = 512;
1690       ref_costs_single[ALTREF_FRAME] = 512;
1691     }
1692     if (cm->reference_mode != SINGLE_REFERENCE) {
1693       vp9_prob ref_comp_p = vp9_get_pred_prob_comp_ref_p(cm, xd);
1694       unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1);
1695
1696       if (cm->reference_mode == REFERENCE_MODE_SELECT)
1697         base_cost += vp9_cost_bit(comp_inter_p, 1);
1698
1699       ref_costs_comp[LAST_FRAME]   = base_cost + vp9_cost_bit(ref_comp_p, 0);
1700       ref_costs_comp[GOLDEN_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 1);
1701     } else {
1702       ref_costs_comp[LAST_FRAME]   = 512;
1703       ref_costs_comp[GOLDEN_FRAME] = 512;
1704     }
1705   }
1706 }
1707
1708 static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
1709                          int mode_index,
1710                          int64_t comp_pred_diff[REFERENCE_MODES],
1711                          const int64_t tx_size_diff[TX_MODES],
1712                          int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS],
1713                          int skippable) {
1714   MACROBLOCKD *const xd = &x->e_mbd;
1715
1716   // Take a snapshot of the coding context so it can be
1717   // restored if we decide to encode this way
1718   ctx->skip = x->skip;
1719   ctx->skippable = skippable;
1720   ctx->best_mode_index = mode_index;
1721   ctx->mic = *xd->mi[0];
1722   ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE];
1723   ctx->comp_pred_diff   = (int)comp_pred_diff[COMPOUND_REFERENCE];
1724   ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT];
1725
1726   vpx_memcpy(ctx->tx_rd_diff, tx_size_diff, sizeof(ctx->tx_rd_diff));
1727   vpx_memcpy(ctx->best_filter_diff, best_filter_diff,
1728              sizeof(*best_filter_diff) * SWITCHABLE_FILTER_CONTEXTS);
1729 }
1730
1731 static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
1732                                const TileInfo *const tile,
1733                                MV_REFERENCE_FRAME ref_frame,
1734                                BLOCK_SIZE block_size,
1735                                int mi_row, int mi_col,
1736                                int_mv frame_nearest_mv[MAX_REF_FRAMES],
1737                                int_mv frame_near_mv[MAX_REF_FRAMES],
1738                                struct buf_2d yv12_mb[4][MAX_MB_PLANE]) {
1739   const VP9_COMMON *cm = &cpi->common;
1740   const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
1741   MACROBLOCKD *const xd = &x->e_mbd;
1742   MODE_INFO *const mi = xd->mi[0];
1743   int_mv *const candidates = mi->mbmi.ref_mvs[ref_frame];
1744   const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf;
1745
1746   // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this
1747   // use the UV scaling factors.
1748   vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf);
1749
1750   // Gets an initial list of candidate vectors from neighbours and orders them
1751   vp9_find_mv_refs(cm, xd, tile, mi, ref_frame, candidates, mi_row, mi_col);
1752
1753   // Candidate refinement carried out at encoder and decoder
1754   vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates,
1755                         &frame_nearest_mv[ref_frame],
1756                         &frame_near_mv[ref_frame]);
1757
1758   // Further refinement that is encode side only to test the top few candidates
1759   // in full and choose the best as the centre point for subsequent searches.
1760   // The current implementation doesn't support scaling.
1761   if (!vp9_is_scaled(sf) && block_size >= BLOCK_8X8)
1762     vp9_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride,
1763                 ref_frame, block_size);
1764 }
1765
1766 static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
1767                                  BLOCK_SIZE bsize,
1768                                  int mi_row, int mi_col,
1769                                  int_mv *tmp_mv, int *rate_mv) {
1770   MACROBLOCKD *xd = &x->e_mbd;
1771   const VP9_COMMON *cm = &cpi->common;
1772   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
1773   struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0, 0}};
1774   int bestsme = INT_MAX;
1775   int step_param;
1776   int sadpb = x->sadperbit16;
1777   MV mvp_full;
1778   int ref = mbmi->ref_frame[0];
1779   MV ref_mv = mbmi->ref_mvs[ref][0].as_mv;
1780
1781   int tmp_col_min = x->mv_col_min;
1782   int tmp_col_max = x->mv_col_max;
1783   int tmp_row_min = x->mv_row_min;
1784   int tmp_row_max = x->mv_row_max;
1785   int sad_list[5];
1786
1787   const YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi,
1788                                                                         ref);
1789
1790   MV pred_mv[3];
1791   pred_mv[0] = mbmi->ref_mvs[ref][0].as_mv;
1792   pred_mv[1] = mbmi->ref_mvs[ref][1].as_mv;
1793   pred_mv[2] = x->pred_mv[ref];
1794
1795   if (scaled_ref_frame) {
1796     int i;
1797     // Swap out the reference frame for a version that's been scaled to
1798     // match the resolution of the current frame, allowing the existing
1799     // motion search code to be used without additional modifications.
1800     for (i = 0; i < MAX_MB_PLANE; i++)
1801       backup_yv12[i] = xd->plane[i].pre[0];
1802
1803     vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
1804   }
1805
1806   vp9_set_mv_search_range(x, &ref_mv);
1807
1808   // Work out the size of the first step in the mv step search.
1809   // 0 here is maximum length first step. 1 is MAX >> 1 etc.
1810   if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) {
1811     // Take wtd average of the step_params based on the last frame's
1812     // max mv magnitude and that based on the best ref mvs of the current
1813     // block for the given reference.
1814     step_param = (vp9_init_search_range(x->max_mv_context[ref]) +
1815                     cpi->mv_step_param) / 2;
1816   } else {
1817     step_param = cpi->mv_step_param;
1818   }
1819
1820   if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64) {
1821     int boffset = 2 * (b_width_log2(BLOCK_64X64) - MIN(b_height_log2(bsize),
1822                                                        b_width_log2(bsize)));
1823     step_param = MAX(step_param, boffset);
1824   }
1825
1826   if (cpi->sf.adaptive_motion_search) {
1827     int bwl = b_width_log2(bsize);
1828     int bhl = b_height_log2(bsize);
1829     int i;
1830     int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4);
1831
1832     if (tlevel < 5)
1833       step_param += 2;
1834
1835     for (i = LAST_FRAME; i <= ALTREF_FRAME && cm->show_frame; ++i) {
1836       if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) {
1837         x->pred_mv[ref].row = 0;
1838         x->pred_mv[ref].col = 0;
1839         tmp_mv->as_int = INVALID_MV;
1840
1841         if (scaled_ref_frame) {
1842           int i;
1843           for (i = 0; i < MAX_MB_PLANE; i++)
1844             xd->plane[i].pre[0] = backup_yv12[i];
1845         }
1846         return;
1847       }
1848     }
1849   }
1850
1851   mvp_full = pred_mv[x->mv_best_ref_index[ref]];
1852
1853   mvp_full.col >>= 3;
1854   mvp_full.row >>= 3;
1855
1856   bestsme = vp9_full_pixel_search(cpi, x, bsize, &mvp_full, step_param, sadpb,
1857                                   cond_sad_list(cpi, sad_list),
1858                                   &ref_mv, &tmp_mv->as_mv, INT_MAX, 1);
1859
1860   x->mv_col_min = tmp_col_min;
1861   x->mv_col_max = tmp_col_max;
1862   x->mv_row_min = tmp_row_min;
1863   x->mv_row_max = tmp_row_max;
1864
1865   if (bestsme < INT_MAX) {
1866     int dis;  /* TODO: use dis in distortion calculation later. */
1867     cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv,
1868                                  cm->allow_high_precision_mv,
1869                                  x->errorperbit,
1870                                  &cpi->fn_ptr[bsize],
1871                                  cpi->sf.mv.subpel_force_stop,
1872                                  cpi->sf.mv.subpel_iters_per_step,
1873                                  cond_sad_list(cpi, sad_list),
1874                                  x->nmvjointcost, x->mvcost,
1875                                  &dis, &x->pred_sse[ref], NULL, 0, 0);
1876   }
1877   *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv,
1878                              x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
1879
1880   if (cpi->sf.adaptive_motion_search)
1881     x->pred_mv[ref] = tmp_mv->as_mv;
1882
1883   if (scaled_ref_frame) {
1884     int i;
1885     for (i = 0; i < MAX_MB_PLANE; i++)
1886       xd->plane[i].pre[0] = backup_yv12[i];
1887   }
1888 }
1889
1890 static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
1891                                 BLOCK_SIZE bsize,
1892                                 int_mv *frame_mv,
1893                                 int mi_row, int mi_col,
1894                                 int_mv single_newmv[MAX_REF_FRAMES],
1895                                 int *rate_mv) {
1896   const int pw = 4 * num_4x4_blocks_wide_lookup[bsize];
1897   const int ph = 4 * num_4x4_blocks_high_lookup[bsize];
1898   MACROBLOCKD *xd = &x->e_mbd;
1899   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
1900   const int refs[2] = { mbmi->ref_frame[0],
1901                         mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
1902   int_mv ref_mv[2];
1903   int ite, ref;
1904   // Prediction buffer from second frame.
1905   uint8_t *second_pred = vpx_memalign(16, pw * ph * sizeof(uint8_t));
1906   const InterpKernel *kernel = vp9_get_interp_kernel(mbmi->interp_filter);
1907
1908   // Do joint motion search in compound mode to get more accurate mv.
1909   struct buf_2d backup_yv12[2][MAX_MB_PLANE];
1910   struct buf_2d scaled_first_yv12 = xd->plane[0].pre[0];
1911   int last_besterr[2] = {INT_MAX, INT_MAX};
1912   const YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = {
1913     vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[0]),
1914     vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[1])
1915   };
1916
1917   for (ref = 0; ref < 2; ++ref) {
1918     ref_mv[ref] = mbmi->ref_mvs[refs[ref]][0];
1919
1920     if (scaled_ref_frame[ref]) {
1921       int i;
1922       // Swap out the reference frame for a version that's been scaled to
1923       // match the resolution of the current frame, allowing the existing
1924       // motion search code to be used without additional modifications.
1925       for (i = 0; i < MAX_MB_PLANE; i++)
1926         backup_yv12[ref][i] = xd->plane[i].pre[ref];
1927       vp9_setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col,
1928                            NULL);
1929     }
1930
1931     frame_mv[refs[ref]].as_int = single_newmv[refs[ref]].as_int;
1932   }
1933
1934   // Allow joint search multiple times iteratively for each ref frame
1935   // and break out the search loop if it couldn't find better mv.
1936   for (ite = 0; ite < 4; ite++) {
1937     struct buf_2d ref_yv12[2];
1938     int bestsme = INT_MAX;
1939     int sadpb = x->sadperbit16;
1940     MV tmp_mv;
1941     int search_range = 3;
1942
1943     int tmp_col_min = x->mv_col_min;
1944     int tmp_col_max = x->mv_col_max;
1945     int tmp_row_min = x->mv_row_min;
1946     int tmp_row_max = x->mv_row_max;
1947     int id = ite % 2;
1948
1949     // Initialized here because of compiler problem in Visual Studio.
1950     ref_yv12[0] = xd->plane[0].pre[0];
1951     ref_yv12[1] = xd->plane[0].pre[1];
1952
1953     // Get pred block from second frame.
1954     vp9_build_inter_predictor(ref_yv12[!id].buf,
1955                               ref_yv12[!id].stride,
1956                               second_pred, pw,
1957                               &frame_mv[refs[!id]].as_mv,
1958                               &xd->block_refs[!id]->sf,
1959                               pw, ph, 0,
1960                               kernel, MV_PRECISION_Q3,
1961                               mi_col * MI_SIZE, mi_row * MI_SIZE);
1962
1963     // Compound motion search on first ref frame.
1964     if (id)
1965       xd->plane[0].pre[0] = ref_yv12[id];
1966     vp9_set_mv_search_range(x, &ref_mv[id].as_mv);
1967
1968     // Use mv result from single mode as mvp.
1969     tmp_mv = frame_mv[refs[id]].as_mv;
1970
1971     tmp_mv.col >>= 3;
1972     tmp_mv.row >>= 3;
1973
1974     // Small-range full-pixel motion search
1975     bestsme = vp9_refining_search_8p_c(x, &tmp_mv, sadpb,
1976                                        search_range,
1977                                        &cpi->fn_ptr[bsize],
1978                                        &ref_mv[id].as_mv, second_pred);
1979     if (bestsme < INT_MAX)
1980       bestsme = vp9_get_mvpred_av_var(x, &tmp_mv, &ref_mv[id].as_mv,
1981                                       second_pred, &cpi->fn_ptr[bsize], 1);
1982
1983     x->mv_col_min = tmp_col_min;
1984     x->mv_col_max = tmp_col_max;
1985     x->mv_row_min = tmp_row_min;
1986     x->mv_row_max = tmp_row_max;
1987
1988     if (bestsme < INT_MAX) {
1989       int dis; /* TODO: use dis in distortion calculation later. */
1990       unsigned int sse;
1991       bestsme = cpi->find_fractional_mv_step(
1992           x, &tmp_mv,
1993           &ref_mv[id].as_mv,
1994           cpi->common.allow_high_precision_mv,
1995           x->errorperbit,
1996           &cpi->fn_ptr[bsize],
1997           0, cpi->sf.mv.subpel_iters_per_step,
1998           NULL,
1999           x->nmvjointcost, x->mvcost,
2000           &dis, &sse, second_pred,
2001           pw, ph);
2002     }
2003
2004     if (id)
2005       xd->plane[0].pre[0] = scaled_first_yv12;
2006
2007     if (bestsme < last_besterr[id]) {
2008       frame_mv[refs[id]].as_mv = tmp_mv;
2009       last_besterr[id] = bestsme;
2010     } else {
2011       break;
2012     }
2013   }
2014
2015   *rate_mv = 0;
2016
2017   for (ref = 0; ref < 2; ++ref) {
2018     if (scaled_ref_frame[ref]) {
2019       // restore the predictor
2020       int i;
2021       for (i = 0; i < MAX_MB_PLANE; i++)
2022         xd->plane[i].pre[ref] = backup_yv12[ref][i];
2023     }
2024
2025     *rate_mv += vp9_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
2026                                 &mbmi->ref_mvs[refs[ref]][0].as_mv,
2027                                 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2028   }
2029
2030   vpx_free(second_pred);
2031 }
2032
2033 static INLINE void restore_dst_buf(MACROBLOCKD *xd,
2034                                    uint8_t *orig_dst[MAX_MB_PLANE],
2035                                    int orig_dst_stride[MAX_MB_PLANE]) {
2036   int i;
2037   for (i = 0; i < MAX_MB_PLANE; i++) {
2038     xd->plane[i].dst.buf = orig_dst[i];
2039     xd->plane[i].dst.stride = orig_dst_stride[i];
2040   }
2041 }
2042
2043 static void rd_encode_breakout_test(VP9_COMP *cpi, MACROBLOCK *x,
2044                                     BLOCK_SIZE bsize, int *rate2,
2045                                     int64_t *distortion, int64_t *distortion_uv,
2046                                     int *disable_skip) {
2047   VP9_COMMON *cm = &cpi->common;
2048   MACROBLOCKD *xd = &x->e_mbd;
2049   const BLOCK_SIZE y_size = get_plane_block_size(bsize, &xd->plane[0]);
2050   const BLOCK_SIZE uv_size = get_plane_block_size(bsize, &xd->plane[1]);
2051   unsigned int var, sse;
2052   // Skipping threshold for ac.
2053   unsigned int thresh_ac;
2054   // Skipping threshold for dc
2055   unsigned int thresh_dc;
2056
2057   var = cpi->fn_ptr[y_size].vf(x->plane[0].src.buf, x->plane[0].src.stride,
2058                                xd->plane[0].dst.buf,
2059                                xd->plane[0].dst.stride, &sse);
2060
2061   if (x->encode_breakout > 0) {
2062     // Set a maximum for threshold to avoid big PSNR loss in low bitrate
2063     // case. Use extreme low threshold for static frames to limit skipping.
2064     const unsigned int max_thresh = (cpi->allow_encode_breakout ==
2065                                      ENCODE_BREAKOUT_LIMITED) ? 128 : 36000;
2066     // The encode_breakout input
2067     const unsigned int min_thresh =
2068         MIN(((unsigned int)x->encode_breakout << 4), max_thresh);
2069
2070     // Calculate threshold according to dequant value.
2071     thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) / 9;
2072     thresh_ac = clamp(thresh_ac, min_thresh, max_thresh);
2073
2074     // Adjust threshold according to partition size.
2075     thresh_ac >>= 8 - (b_width_log2(bsize) +
2076         b_height_log2(bsize));
2077     thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6);
2078   } else {
2079     thresh_ac = 0;
2080     thresh_dc = 0;
2081   }
2082
2083   // Y skipping condition checking
2084   if (sse < thresh_ac || sse == 0) {
2085     // dc skipping checking
2086     if ((sse - var) < thresh_dc || sse == var) {
2087       unsigned int sse_u, sse_v;
2088       unsigned int var_u, var_v;
2089
2090       var_u = cpi->fn_ptr[uv_size].vf(x->plane[1].src.buf,
2091                                       x->plane[1].src.stride,
2092                                       xd->plane[1].dst.buf,
2093                                       xd->plane[1].dst.stride, &sse_u);
2094
2095       // U skipping condition checking
2096       if ((sse_u * 4 < thresh_ac || sse_u == 0) &&
2097           (sse_u - var_u < thresh_dc || sse_u == var_u)) {
2098         var_v = cpi->fn_ptr[uv_size].vf(x->plane[2].src.buf,
2099                                         x->plane[2].src.stride,
2100                                         xd->plane[2].dst.buf,
2101                                         xd->plane[2].dst.stride, &sse_v);
2102
2103         // V skipping condition checking
2104         if ((sse_v * 4 < thresh_ac || sse_v == 0) &&
2105             (sse_v - var_v < thresh_dc || sse_v == var_v)) {
2106           x->skip = 1;
2107
2108           // The cost of skip bit needs to be added.
2109           *rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
2110
2111           // Scaling factor for SSE from spatial domain to frequency domain
2112           // is 16. Adjust distortion accordingly.
2113           *distortion_uv = (sse_u + sse_v) << 4;
2114           *distortion = (sse << 4) + *distortion_uv;
2115
2116           *disable_skip = 1;
2117         }
2118       }
2119     }
2120   }
2121 }
2122
2123 static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
2124                                  BLOCK_SIZE bsize,
2125                                  int64_t txfm_cache[],
2126                                  int *rate2, int64_t *distortion,
2127                                  int *skippable,
2128                                  int *rate_y, int64_t *distortion_y,
2129                                  int *rate_uv, int64_t *distortion_uv,
2130                                  int *disable_skip,
2131                                  int_mv (*mode_mv)[MAX_REF_FRAMES],
2132                                  int mi_row, int mi_col,
2133                                  int_mv single_newmv[MAX_REF_FRAMES],
2134                                  INTERP_FILTER (*single_filter)[MAX_REF_FRAMES],
2135                                  int (*single_skippable)[MAX_REF_FRAMES],
2136                                  int64_t *psse,
2137                                  const int64_t ref_best_rd) {
2138   VP9_COMMON *cm = &cpi->common;
2139   RD_OPT *rd_opt = &cpi->rd;
2140   MACROBLOCKD *xd = &x->e_mbd;
2141   MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
2142   const int is_comp_pred = has_second_ref(mbmi);
2143   const int this_mode = mbmi->mode;
2144   int_mv *frame_mv = mode_mv[this_mode];
2145   int i;
2146   int refs[2] = { mbmi->ref_frame[0],
2147     (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
2148   int_mv cur_mv[2];
2149   int64_t this_rd = 0;
2150   DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf, MAX_MB_PLANE * 64 * 64);
2151   int pred_exists = 0;
2152   int intpel_mv;
2153   int64_t rd, tmp_rd, best_rd = INT64_MAX;
2154   int best_needs_copy = 0;
2155   uint8_t *orig_dst[MAX_MB_PLANE];
2156   int orig_dst_stride[MAX_MB_PLANE];
2157   int rs = 0;
2158   INTERP_FILTER best_filter = SWITCHABLE;
2159   uint8_t skip_txfm[MAX_MB_PLANE << 2] = {0};
2160   int64_t bsse[MAX_MB_PLANE << 2] = {0};
2161
2162   int bsl = mi_width_log2_lookup[bsize];
2163   int pred_filter_search = cpi->sf.cb_pred_filter_search ?
2164       (((mi_row + mi_col) >> bsl) +
2165        get_chessboard_index(cm->current_video_frame)) & 0x1 : 0;
2166
2167   if (pred_filter_search) {
2168     INTERP_FILTER af = SWITCHABLE, lf = SWITCHABLE;
2169     if (xd->up_available)
2170       af = xd->mi[-xd->mi_stride]->mbmi.interp_filter;
2171     if (xd->left_available)
2172       lf = xd->mi[-1]->mbmi.interp_filter;
2173
2174     if ((this_mode != NEWMV) || (af == lf))
2175       best_filter = af;
2176   }
2177
2178   if (is_comp_pred) {
2179     if (frame_mv[refs[0]].as_int == INVALID_MV ||
2180         frame_mv[refs[1]].as_int == INVALID_MV)
2181       return INT64_MAX;
2182
2183     if (cpi->sf.adaptive_mode_search) {
2184       if (single_filter[this_mode][refs[0]] ==
2185           single_filter[this_mode][refs[1]])
2186         best_filter = single_filter[this_mode][refs[0]];
2187     }
2188   }
2189
2190   if (this_mode == NEWMV) {
2191     int rate_mv;
2192     if (is_comp_pred) {
2193       // Initialize mv using single prediction mode result.
2194       frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
2195       frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
2196
2197       if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
2198         joint_motion_search(cpi, x, bsize, frame_mv,
2199                             mi_row, mi_col, single_newmv, &rate_mv);
2200       } else {
2201         rate_mv  = vp9_mv_bit_cost(&frame_mv[refs[0]].as_mv,
2202                                    &mbmi->ref_mvs[refs[0]][0].as_mv,
2203                                    x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2204         rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]].as_mv,
2205                                    &mbmi->ref_mvs[refs[1]][0].as_mv,
2206                                    x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2207       }
2208       *rate2 += rate_mv;
2209     } else {
2210       int_mv tmp_mv;
2211       single_motion_search(cpi, x, bsize, mi_row, mi_col,
2212                            &tmp_mv, &rate_mv);
2213       if (tmp_mv.as_int == INVALID_MV)
2214         return INT64_MAX;
2215       *rate2 += rate_mv;
2216       frame_mv[refs[0]].as_int =
2217           xd->mi[0]->bmi[0].as_mv[0].as_int = tmp_mv.as_int;
2218       single_newmv[refs[0]].as_int = tmp_mv.as_int;
2219     }
2220   }
2221
2222   for (i = 0; i < is_comp_pred + 1; ++i) {
2223     cur_mv[i] = frame_mv[refs[i]];
2224     // Clip "next_nearest" so that it does not extend to far out of image
2225     if (this_mode != NEWMV)
2226       clamp_mv2(&cur_mv[i].as_mv, xd);
2227
2228     if (mv_check_bounds(x, &cur_mv[i].as_mv))
2229       return INT64_MAX;
2230     mbmi->mv[i].as_int = cur_mv[i].as_int;
2231   }
2232
2233   // do first prediction into the destination buffer. Do the next
2234   // prediction into a temporary buffer. Then keep track of which one
2235   // of these currently holds the best predictor, and use the other
2236   // one for future predictions. In the end, copy from tmp_buf to
2237   // dst if necessary.
2238   for (i = 0; i < MAX_MB_PLANE; i++) {
2239     orig_dst[i] = xd->plane[i].dst.buf;
2240     orig_dst_stride[i] = xd->plane[i].dst.stride;
2241   }
2242
2243   /* We don't include the cost of the second reference here, because there
2244    * are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other
2245    * words if you present them in that order, the second one is always known
2246    * if the first is known */
2247   *rate2 += cost_mv_ref(cpi, this_mode, mbmi->mode_context[refs[0]]);
2248
2249   if (RDCOST(x->rdmult, x->rddiv, *rate2, 0) > ref_best_rd &&
2250       mbmi->mode != NEARESTMV)
2251     return INT64_MAX;
2252
2253   pred_exists = 0;
2254   // Are all MVs integer pel for Y and UV
2255   intpel_mv = !mv_has_subpel(&mbmi->mv[0].as_mv);
2256   if (is_comp_pred)
2257     intpel_mv &= !mv_has_subpel(&mbmi->mv[1].as_mv);
2258
2259   // Search for best switchable filter by checking the variance of
2260   // pred error irrespective of whether the filter will be used
2261   rd_opt->mask_filter = 0;
2262   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
2263     rd_opt->filter_cache[i] = INT64_MAX;
2264
2265   if (cm->interp_filter != BILINEAR) {
2266     if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) {
2267       best_filter = EIGHTTAP;
2268     } else if (best_filter == SWITCHABLE) {
2269       int newbest;
2270       int tmp_rate_sum = 0;
2271       int64_t tmp_dist_sum = 0;
2272
2273       for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
2274         int j;
2275         int64_t rs_rd;
2276         mbmi->interp_filter = i;
2277         rs = vp9_get_switchable_rate(cpi);
2278         rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
2279
2280         if (i > 0 && intpel_mv) {
2281           rd = RDCOST(x->rdmult, x->rddiv, tmp_rate_sum, tmp_dist_sum);
2282           rd_opt->filter_cache[i] = rd;
2283           rd_opt->filter_cache[SWITCHABLE_FILTERS] =
2284               MIN(rd_opt->filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
2285           if (cm->interp_filter == SWITCHABLE)
2286             rd += rs_rd;
2287           rd_opt->mask_filter = MAX(rd_opt->mask_filter, rd);
2288         } else {
2289           int rate_sum = 0;
2290           int64_t dist_sum = 0;
2291           if (i > 0 && cpi->sf.adaptive_interp_filter_search &&
2292               (cpi->sf.interp_filter_search_mask & (1 << i))) {
2293             rate_sum = INT_MAX;
2294             dist_sum = INT64_MAX;
2295             continue;
2296           }
2297
2298           if ((cm->interp_filter == SWITCHABLE &&
2299                (!i || best_needs_copy)) ||
2300               (cm->interp_filter != SWITCHABLE &&
2301                (cm->interp_filter == mbmi->interp_filter ||
2302                 (i == 0 && intpel_mv)))) {
2303             restore_dst_buf(xd, orig_dst, orig_dst_stride);
2304           } else {
2305             for (j = 0; j < MAX_MB_PLANE; j++) {
2306               xd->plane[j].dst.buf = tmp_buf + j * 64 * 64;
2307               xd->plane[j].dst.stride = 64;
2308             }
2309           }
2310           vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
2311           model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum);
2312
2313           rd = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum);
2314           rd_opt->filter_cache[i] = rd;
2315           rd_opt->filter_cache[SWITCHABLE_FILTERS] =
2316               MIN(rd_opt->filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
2317           if (cm->interp_filter == SWITCHABLE)
2318             rd += rs_rd;
2319           rd_opt->mask_filter = MAX(rd_opt->mask_filter, rd);
2320
2321           if (i == 0 && intpel_mv) {
2322             tmp_rate_sum = rate_sum;
2323             tmp_dist_sum = dist_sum;
2324           }
2325         }
2326
2327         if (i == 0 && cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
2328           if (rd / 2 > ref_best_rd) {
2329             restore_dst_buf(xd, orig_dst, orig_dst_stride);
2330             return INT64_MAX;
2331           }
2332         }
2333         newbest = i == 0 || rd < best_rd;
2334
2335         if (newbest) {
2336           best_rd = rd;
2337           best_filter = mbmi->interp_filter;
2338           if (cm->interp_filter == SWITCHABLE && i && !intpel_mv)
2339             best_needs_copy = !best_needs_copy;
2340           vpx_memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm));
2341           vpx_memcpy(bsse, x->bsse, sizeof(bsse));
2342         }
2343
2344         if ((cm->interp_filter == SWITCHABLE && newbest) ||
2345             (cm->interp_filter != SWITCHABLE &&
2346              cm->interp_filter == mbmi->interp_filter)) {
2347           pred_exists = 1;
2348           tmp_rd = best_rd;
2349         }
2350       }
2351       restore_dst_buf(xd, orig_dst, orig_dst_stride);
2352     }
2353   }
2354   // Set the appropriate filter
2355   mbmi->interp_filter = cm->interp_filter != SWITCHABLE ?
2356       cm->interp_filter : best_filter;
2357   rs = cm->interp_filter == SWITCHABLE ? vp9_get_switchable_rate(cpi) : 0;
2358
2359   if (pred_exists) {
2360     if (best_needs_copy) {
2361       // again temporarily set the buffers to local memory to prevent a memcpy
2362       for (i = 0; i < MAX_MB_PLANE; i++) {
2363         xd->plane[i].dst.buf = tmp_buf + i * 64 * 64;
2364         xd->plane[i].dst.stride = 64;
2365       }
2366     }
2367     rd = tmp_rd + RDCOST(x->rdmult, x->rddiv, rs, 0);
2368   } else {
2369     int tmp_rate;
2370     int64_t tmp_dist;
2371     // Handles the special case when a filter that is not in the
2372     // switchable list (ex. bilinear) is indicated at the frame level, or
2373     // skip condition holds.
2374     vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
2375     model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist);
2376     rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist);
2377     vpx_memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm));
2378     vpx_memcpy(bsse, x->bsse, sizeof(bsse));
2379   }
2380
2381   if (!is_comp_pred)
2382     single_filter[this_mode][refs[0]] = mbmi->interp_filter;
2383
2384   if (cpi->sf.adaptive_mode_search)
2385     if (is_comp_pred)
2386       if (single_skippable[this_mode][refs[0]] &&
2387           single_skippable[this_mode][refs[1]])
2388         vpx_memset(skip_txfm, 1, sizeof(skip_txfm));
2389
2390   if (cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
2391     // if current pred_error modeled rd is substantially more than the best
2392     // so far, do not bother doing full rd
2393     if (rd / 2 > ref_best_rd) {
2394       restore_dst_buf(xd, orig_dst, orig_dst_stride);
2395       return INT64_MAX;
2396     }
2397   }
2398
2399   if (cm->interp_filter == SWITCHABLE)
2400     *rate2 += rs;
2401
2402   if (!is_comp_pred) {
2403     if (cpi->allow_encode_breakout)
2404       rd_encode_breakout_test(cpi, x, bsize, rate2, distortion, distortion_uv,
2405                               disable_skip);
2406   }
2407
2408   vpx_memcpy(x->skip_txfm, skip_txfm, sizeof(skip_txfm));
2409   vpx_memcpy(x->bsse, bsse, sizeof(bsse));
2410
2411   if (!x->skip) {
2412     int skippable_y, skippable_uv;
2413     int64_t sseuv = INT64_MAX;
2414     int64_t rdcosty = INT64_MAX;
2415
2416     // Y cost and distortion
2417     vp9_subtract_plane(x, bsize, 0);
2418     super_block_yrd(cpi, x, rate_y, distortion_y, &skippable_y, psse,
2419                     bsize, txfm_cache, ref_best_rd);
2420
2421     if (*rate_y == INT_MAX) {
2422       *rate2 = INT_MAX;
2423       *distortion = INT64_MAX;
2424       restore_dst_buf(xd, orig_dst, orig_dst_stride);
2425       return INT64_MAX;
2426     }
2427
2428     *rate2 += *rate_y;
2429     *distortion += *distortion_y;
2430
2431     rdcosty = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
2432     rdcosty = MIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse));
2433
2434     super_block_uvrd(cpi, x, rate_uv, distortion_uv, &skippable_uv, &sseuv,
2435                      bsize, ref_best_rd - rdcosty);
2436     if (*rate_uv == INT_MAX) {
2437       *rate2 = INT_MAX;
2438       *distortion = INT64_MAX;
2439       restore_dst_buf(xd, orig_dst, orig_dst_stride);
2440       return INT64_MAX;
2441     }
2442
2443     *psse += sseuv;
2444     *rate2 += *rate_uv;
2445     *distortion += *distortion_uv;
2446     *skippable = skippable_y && skippable_uv;
2447   }
2448
2449   if (!is_comp_pred)
2450     single_skippable[this_mode][refs[0]] = *skippable;
2451
2452   restore_dst_buf(xd, orig_dst, orig_dst_stride);
2453   return this_rd;  // if 0, this will be re-calculated by caller
2454 }
2455
2456 void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
2457                                int *returnrate, int64_t *returndist,
2458                                BLOCK_SIZE bsize,
2459                                PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
2460   VP9_COMMON *const cm = &cpi->common;
2461   MACROBLOCKD *const xd = &x->e_mbd;
2462   struct macroblockd_plane *const pd = xd->plane;
2463   int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
2464   int y_skip = 0, uv_skip = 0;
2465   int64_t dist_y = 0, dist_uv = 0, tx_cache[TX_MODES] = { 0 };
2466   TX_SIZE max_uv_tx_size;
2467   x->skip_encode = 0;
2468   ctx->skip = 0;
2469   xd->mi[0]->mbmi.ref_frame[0] = INTRA_FRAME;
2470
2471   if (bsize >= BLOCK_8X8) {
2472     if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly,
2473                                &dist_y, &y_skip, bsize, tx_cache,
2474                                best_rd) >= best_rd) {
2475       *returnrate = INT_MAX;
2476       return;
2477     }
2478     max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0]->mbmi.tx_size, bsize,
2479                                          pd[1].subsampling_x,
2480                                          pd[1].subsampling_y);
2481     rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
2482                             &dist_uv, &uv_skip, bsize, max_uv_tx_size);
2483   } else {
2484     y_skip = 0;
2485     if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate_y, &rate_y_tokenonly,
2486                                      &dist_y, best_rd) >= best_rd) {
2487       *returnrate = INT_MAX;
2488       return;
2489     }
2490     max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0]->mbmi.tx_size, bsize,
2491                                          pd[1].subsampling_x,
2492                                          pd[1].subsampling_y);
2493     rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
2494                             &dist_uv, &uv_skip, BLOCK_8X8, max_uv_tx_size);
2495   }
2496
2497   if (y_skip && uv_skip) {
2498     *returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
2499                   vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
2500     *returndist = dist_y + dist_uv;
2501     vp9_zero(ctx->tx_rd_diff);
2502   } else {
2503     int i;
2504     *returnrate = rate_y + rate_uv + vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
2505     *returndist = dist_y + dist_uv;
2506     if (cpi->sf.tx_size_search_method == USE_FULL_RD)
2507       for (i = 0; i < TX_MODES; i++) {
2508         if (tx_cache[i] < INT64_MAX && tx_cache[cm->tx_mode] < INT64_MAX)
2509           ctx->tx_rd_diff[i] = tx_cache[i] - tx_cache[cm->tx_mode];
2510         else
2511           ctx->tx_rd_diff[i] = 0;
2512       }
2513   }
2514
2515   ctx->mic = *xd->mi[0];
2516 }
2517
2518 // Updating rd_thresh_freq_fact[] here means that the different
2519 // partition/block sizes are handled independently based on the best
2520 // choice for the current partition. It may well be better to keep a scaled
2521 // best rd so far value and update rd_thresh_freq_fact based on the mode/size
2522 // combination that wins out.
2523 static void update_rd_thresh_fact(VP9_COMP *cpi, int bsize,
2524                                   int best_mode_index) {
2525   if (cpi->sf.adaptive_rd_thresh > 0) {
2526     const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES;
2527     int mode;
2528     for (mode = 0; mode < top_mode; ++mode) {
2529       int *const fact = &cpi->rd.thresh_freq_fact[bsize][mode];
2530
2531       if (mode == best_mode_index) {
2532         *fact -= (*fact >> 3);
2533       } else {
2534         *fact = MIN(*fact + RD_THRESH_INC,
2535                     cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT);
2536       }
2537     }
2538   }
2539 }
2540
2541 int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
2542                                   const TileInfo *const tile,
2543                                   int mi_row, int mi_col,
2544                                   int *returnrate,
2545                                   int64_t *returndistortion,
2546                                   BLOCK_SIZE bsize,
2547                                   PICK_MODE_CONTEXT *ctx,
2548                                   int64_t best_rd_so_far) {
2549   VP9_COMMON *const cm = &cpi->common;
2550   RD_OPT *const rd_opt = &cpi->rd;
2551   MACROBLOCKD *const xd = &x->e_mbd;
2552   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
2553   const struct segmentation *const seg = &cm->seg;
2554   struct macroblockd_plane *const pd = xd->plane;
2555   PREDICTION_MODE this_mode;
2556   MV_REFERENCE_FRAME ref_frame, second_ref_frame;
2557   unsigned char segment_id = mbmi->segment_id;
2558   int comp_pred, i, k;
2559   int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
2560   struct buf_2d yv12_mb[4][MAX_MB_PLANE];
2561   int_mv single_newmv[MAX_REF_FRAMES] = { { 0 } };
2562   INTERP_FILTER single_inter_filter[MB_MODE_COUNT][MAX_REF_FRAMES];
2563   int single_skippable[MB_MODE_COUNT][MAX_REF_FRAMES];
2564   static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
2565                                     VP9_ALT_FLAG };
2566   int64_t best_rd = best_rd_so_far;
2567   int64_t best_tx_rd[TX_MODES];
2568   int64_t best_tx_diff[TX_MODES];
2569   int64_t best_pred_diff[REFERENCE_MODES];
2570   int64_t best_pred_rd[REFERENCE_MODES];
2571   int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
2572   int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
2573   MB_MODE_INFO best_mbmode;
2574   int best_mode_skippable = 0;
2575   int mode_index, best_mode_index = -1;
2576   unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
2577   vp9_prob comp_mode_p;
2578   int64_t best_intra_rd = INT64_MAX;
2579   unsigned int best_pred_sse = UINT_MAX;
2580   PREDICTION_MODE best_intra_mode = DC_PRED;
2581   int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES];
2582   int64_t dist_uv[TX_SIZES];
2583   int skip_uv[TX_SIZES];
2584   PREDICTION_MODE mode_uv[TX_SIZES];
2585   int intra_cost_penalty = 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q);
2586   int best_skip2 = 0;
2587   uint8_t ref_frame_skip_mask[2] = { 0 };
2588   uint16_t mode_skip_mask[MAX_REF_FRAMES] = { 0 };
2589   int mode_skip_start = cpi->sf.mode_skip_start + 1;
2590   const int *const rd_threshes = rd_opt->threshes[segment_id][bsize];
2591   const int *const rd_thresh_freq_fact = rd_opt->thresh_freq_fact[bsize];
2592   const int mode_search_skip_flags = cpi->sf.mode_search_skip_flags;
2593   vp9_zero(best_mbmode);
2594   x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
2595
2596   estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
2597                            &comp_mode_p);
2598
2599   for (i = 0; i < REFERENCE_MODES; ++i)
2600     best_pred_rd[i] = INT64_MAX;
2601   for (i = 0; i < TX_MODES; i++)
2602     best_tx_rd[i] = INT64_MAX;
2603   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
2604     best_filter_rd[i] = INT64_MAX;
2605   for (i = 0; i < TX_SIZES; i++)
2606     rate_uv_intra[i] = INT_MAX;
2607   for (i = 0; i < MAX_REF_FRAMES; ++i)
2608     x->pred_sse[i] = INT_MAX;
2609   for (i = 0; i < MB_MODE_COUNT; ++i) {
2610     for (k = 0; k < MAX_REF_FRAMES; ++k) {
2611       single_inter_filter[i][k] = SWITCHABLE;
2612       single_skippable[i][k] = 0;
2613     }
2614   }
2615
2616   *returnrate = INT_MAX;
2617
2618   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
2619     x->pred_mv_sad[ref_frame] = INT_MAX;
2620     if (cpi->ref_frame_flags & flag_list[ref_frame]) {
2621       setup_buffer_inter(cpi, x, tile, ref_frame, bsize, mi_row, mi_col,
2622                          frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb);
2623     }
2624     frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
2625     frame_mv[ZEROMV][ref_frame].as_int = 0;
2626   }
2627
2628   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
2629     if (!(cpi->ref_frame_flags & flag_list[ref_frame])) {
2630       // Skip checking missing references in both single and compound reference
2631       // modes. Note that a mode will be skipped iff both reference frames
2632       // are masked out.
2633       ref_frame_skip_mask[0] |= (1 << ref_frame);
2634       ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
2635     } else if (cpi->sf.reference_masking) {
2636       for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
2637         // Skip fixed mv modes for poor references
2638         if ((x->pred_mv_sad[ref_frame] >> 2) > x->pred_mv_sad[i]) {
2639           mode_skip_mask[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
2640           break;
2641         }
2642       }
2643     }
2644     // If the segment reference frame feature is enabled....
2645     // then do nothing if the current ref frame is not allowed..
2646     if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
2647         vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
2648       ref_frame_skip_mask[0] |= (1 << ref_frame);
2649       ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
2650     }
2651   }
2652
2653   // Disable this drop out case if the ref frame
2654   // segment level feature is enabled for this segment. This is to
2655   // prevent the possibility that we end up unable to pick any mode.
2656   if (!vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
2657     // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
2658     // unless ARNR filtering is enabled in which case we want
2659     // an unfiltered alternative. We allow near/nearest as well
2660     // because they may result in zero-zero MVs but be cheaper.
2661     if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
2662       ref_frame_skip_mask[0] = (1 << LAST_FRAME) | (1 << GOLDEN_FRAME);
2663       ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;
2664       mode_skip_mask[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO;
2665       if (frame_mv[NEARMV][ALTREF_FRAME].as_int != 0)
2666         mode_skip_mask[ALTREF_FRAME] |= (1 << NEARMV);
2667       if (frame_mv[NEARESTMV][ALTREF_FRAME].as_int != 0)
2668         mode_skip_mask[ALTREF_FRAME] |= (1 << NEARESTMV);
2669     }
2670   }
2671
2672   if (cpi->rc.is_src_frame_alt_ref) {
2673     if (cpi->sf.alt_ref_search_fp) {
2674       mode_skip_mask[ALTREF_FRAME] = 0;
2675       ref_frame_skip_mask[0] = ~(1 << ALTREF_FRAME);
2676       ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;
2677     }
2678   }
2679
2680   if (bsize > cpi->sf.max_intra_bsize) {
2681     ref_frame_skip_mask[0] |= (1 << INTRA_FRAME);
2682     ref_frame_skip_mask[1] |= (1 << INTRA_FRAME);
2683   }
2684
2685   mode_skip_mask[INTRA_FRAME] |=
2686       ~(cpi->sf.intra_y_mode_mask[max_txsize_lookup[bsize]]);
2687
2688   for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
2689     int mode_excluded = 0;
2690     int64_t this_rd = INT64_MAX;
2691     int disable_skip = 0;
2692     int compmode_cost = 0;
2693     int rate2 = 0, rate_y = 0, rate_uv = 0;
2694     int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
2695     int skippable = 0;
2696     int64_t tx_cache[TX_MODES];
2697     int i;
2698     int this_skip2 = 0;
2699     int64_t total_sse = INT64_MAX;
2700     int early_term = 0;
2701
2702     this_mode = vp9_mode_order[mode_index].mode;
2703     ref_frame = vp9_mode_order[mode_index].ref_frame[0];
2704     second_ref_frame = vp9_mode_order[mode_index].ref_frame[1];
2705
2706     // Look at the reference frame of the best mode so far and set the
2707     // skip mask to look at a subset of the remaining modes.
2708     if (mode_index == mode_skip_start && best_mode_index >= 0) {
2709       switch (best_mbmode.ref_frame[0]) {
2710         case INTRA_FRAME:
2711           break;
2712         case LAST_FRAME:
2713           ref_frame_skip_mask[0] |= LAST_FRAME_MODE_MASK;
2714           ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
2715           break;
2716         case GOLDEN_FRAME:
2717           ref_frame_skip_mask[0] |= GOLDEN_FRAME_MODE_MASK;
2718           ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
2719           break;
2720         case ALTREF_FRAME:
2721           ref_frame_skip_mask[0] |= ALT_REF_MODE_MASK;
2722           break;
2723         case NONE:
2724         case MAX_REF_FRAMES:
2725           assert(0 && "Invalid Reference frame");
2726           break;
2727       }
2728     }
2729
2730     if (ref_frame_skip_mask[0] & (1 << ref_frame) &&
2731         ref_frame_skip_mask[1] & (1 << MAX(0, second_ref_frame)))
2732       continue;
2733
2734     if (mode_skip_mask[ref_frame] & (1 << this_mode))
2735       continue;
2736
2737     // Test best rd so far against threshold for trying this mode.
2738     if (rd_less_than_thresh(best_rd, rd_threshes[mode_index],
2739                             rd_thresh_freq_fact[mode_index]))
2740       continue;
2741
2742     if (cpi->sf.motion_field_mode_search) {
2743       const int mi_width  = MIN(num_8x8_blocks_wide_lookup[bsize],
2744                                 tile->mi_col_end - mi_col);
2745       const int mi_height = MIN(num_8x8_blocks_high_lookup[bsize],
2746                                 tile->mi_row_end - mi_row);
2747       const int bsl = mi_width_log2(bsize);
2748       int cb_partition_search_ctrl = (((mi_row + mi_col) >> bsl)
2749           + get_chessboard_index(cm->current_video_frame)) & 0x1;
2750       MB_MODE_INFO *ref_mbmi;
2751       int const_motion = 1;
2752       int skip_ref_frame = !cb_partition_search_ctrl;
2753       MV_REFERENCE_FRAME rf = NONE;
2754       int_mv ref_mv;
2755       ref_mv.as_int = INVALID_MV;
2756
2757       if ((mi_row - 1) >= tile->mi_row_start) {
2758         ref_mv = xd->mi[-xd->mi_stride]->mbmi.mv[0];
2759         rf = xd->mi[-xd->mi_stride]->mbmi.ref_frame[0];
2760         for (i = 0; i < mi_width; ++i) {
2761           ref_mbmi = &xd->mi[-xd->mi_stride + i]->mbmi;
2762           const_motion &= (ref_mv.as_int == ref_mbmi->mv[0].as_int) &&
2763                           (ref_frame == ref_mbmi->ref_frame[0]);
2764           skip_ref_frame &= (rf == ref_mbmi->ref_frame[0]);
2765         }
2766       }
2767
2768       if ((mi_col - 1) >= tile->mi_col_start) {
2769         if (ref_mv.as_int == INVALID_MV)
2770           ref_mv = xd->mi[-1]->mbmi.mv[0];
2771         if (rf == NONE)
2772           rf = xd->mi[-1]->mbmi.ref_frame[0];
2773         for (i = 0; i < mi_height; ++i) {
2774           ref_mbmi = &xd->mi[i * xd->mi_stride - 1]->mbmi;
2775           const_motion &= (ref_mv.as_int == ref_mbmi->mv[0].as_int) &&
2776                           (ref_frame == ref_mbmi->ref_frame[0]);
2777           skip_ref_frame &= (rf == ref_mbmi->ref_frame[0]);
2778         }
2779       }
2780
2781       if (skip_ref_frame && this_mode != NEARESTMV && this_mode != NEWMV)
2782         if (rf > INTRA_FRAME)
2783           if (ref_frame != rf)
2784             continue;
2785
2786       if (const_motion)
2787         if (this_mode == NEARMV || this_mode == ZEROMV)
2788           continue;
2789     }
2790
2791     comp_pred = second_ref_frame > INTRA_FRAME;
2792     if (comp_pred) {
2793       if (!cm->allow_comp_inter_inter)
2794         continue;
2795
2796       if ((mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
2797           best_mode_index >= 0 && best_mbmode.ref_frame[0] == INTRA_FRAME)
2798         continue;
2799
2800       mode_excluded = cm->reference_mode == SINGLE_REFERENCE;
2801     } else {
2802       if (ref_frame != INTRA_FRAME)
2803         mode_excluded = cm->reference_mode == COMPOUND_REFERENCE;
2804     }
2805
2806     if (ref_frame == INTRA_FRAME) {
2807       if (cpi->sf.adaptive_mode_search)
2808         if ((x->source_variance << num_pels_log2_lookup[bsize]) > best_pred_sse)
2809           continue;
2810
2811       if (this_mode != DC_PRED) {
2812         // Disable intra modes other than DC_PRED for blocks with low variance
2813         // Threshold for intra skipping based on source variance
2814         // TODO(debargha): Specialize the threshold for super block sizes
2815         const unsigned int skip_intra_var_thresh = 64;
2816         if ((mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
2817             x->source_variance < skip_intra_var_thresh)
2818           continue;
2819         // Only search the oblique modes if the best so far is
2820         // one of the neighboring directional modes
2821         if ((mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
2822             (this_mode >= D45_PRED && this_mode <= TM_PRED)) {
2823           if (best_mode_index >= 0 &&
2824               best_mbmode.ref_frame[0] > INTRA_FRAME)
2825             continue;
2826         }
2827         if (mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
2828           if (conditional_skipintra(this_mode, best_intra_mode))
2829               continue;
2830         }
2831       }
2832     } else {
2833       const MV_REFERENCE_FRAME ref_frames[2] = {ref_frame, second_ref_frame};
2834       if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv,
2835                               this_mode, ref_frames))
2836         continue;
2837     }
2838
2839     mbmi->mode = this_mode;
2840     mbmi->uv_mode = DC_PRED;
2841     mbmi->ref_frame[0] = ref_frame;
2842     mbmi->ref_frame[1] = second_ref_frame;
2843     // Evaluate all sub-pel filters irrespective of whether we can use
2844     // them for this frame.
2845     mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
2846                                                           : cm->interp_filter;
2847     mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
2848
2849     x->skip = 0;
2850     set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
2851
2852     // Select prediction reference frames.
2853     for (i = 0; i < MAX_MB_PLANE; i++) {
2854       xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
2855       if (comp_pred)
2856         xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
2857     }
2858
2859     for (i = 0; i < TX_MODES; ++i)
2860       tx_cache[i] = INT64_MAX;
2861
2862     if (ref_frame == INTRA_FRAME) {
2863       TX_SIZE uv_tx;
2864       super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable,
2865                       NULL, bsize, tx_cache, best_rd);
2866
2867       if (rate_y == INT_MAX)
2868         continue;
2869
2870       uv_tx = get_uv_tx_size_impl(mbmi->tx_size, bsize, pd[1].subsampling_x,
2871                                   pd[1].subsampling_y);
2872       if (rate_uv_intra[uv_tx] == INT_MAX) {
2873         choose_intra_uv_mode(cpi, ctx, bsize, uv_tx,
2874                              &rate_uv_intra[uv_tx], &rate_uv_tokenonly[uv_tx],
2875                              &dist_uv[uv_tx], &skip_uv[uv_tx], &mode_uv[uv_tx]);
2876       }
2877
2878       rate_uv = rate_uv_tokenonly[uv_tx];
2879       distortion_uv = dist_uv[uv_tx];
2880       skippable = skippable && skip_uv[uv_tx];
2881       mbmi->uv_mode = mode_uv[uv_tx];
2882
2883       rate2 = rate_y + cpi->mbmode_cost[mbmi->mode] + rate_uv_intra[uv_tx];
2884       if (this_mode != DC_PRED && this_mode != TM_PRED)
2885         rate2 += intra_cost_penalty;
2886       distortion2 = distortion_y + distortion_uv;
2887     } else {
2888       this_rd = handle_inter_mode(cpi, x, bsize,
2889                                   tx_cache,
2890                                   &rate2, &distortion2, &skippable,
2891                                   &rate_y, &distortion_y,
2892                                   &rate_uv, &distortion_uv,
2893                                   &disable_skip, frame_mv,
2894                                   mi_row, mi_col,
2895                                   single_newmv, single_inter_filter,
2896                                   single_skippable, &total_sse, best_rd);
2897       if (this_rd == INT64_MAX)
2898         continue;
2899
2900       compmode_cost = vp9_cost_bit(comp_mode_p, comp_pred);
2901
2902       if (cm->reference_mode == REFERENCE_MODE_SELECT)
2903         rate2 += compmode_cost;
2904     }
2905
2906     // Estimate the reference frame signaling cost and add it
2907     // to the rolling cost variable.
2908     if (comp_pred) {
2909       rate2 += ref_costs_comp[ref_frame];
2910     } else {
2911       rate2 += ref_costs_single[ref_frame];
2912     }
2913
2914     if (!disable_skip) {
2915       if (skippable) {
2916         vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
2917
2918         // Back out the coefficient coding costs
2919         rate2 -= (rate_y + rate_uv);
2920         // for best yrd calculation
2921         rate_uv = 0;
2922
2923         // Cost the skip mb case
2924         if (skip_prob) {
2925           int prob_skip_cost = vp9_cost_bit(skip_prob, 1);
2926           rate2 += prob_skip_cost;
2927         }
2928       } else if (ref_frame != INTRA_FRAME && !xd->lossless) {
2929         if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
2930             RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
2931           // Add in the cost of the no skip flag.
2932           rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
2933         } else {
2934           // FIXME(rbultje) make this work for splitmv also
2935           rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
2936           distortion2 = total_sse;
2937           assert(total_sse >= 0);
2938           rate2 -= (rate_y + rate_uv);
2939           rate_y = 0;
2940           rate_uv = 0;
2941           this_skip2 = 1;
2942         }
2943       } else {
2944         // Add in the cost of the no skip flag.
2945         rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
2946       }
2947
2948       // Calculate the final RD estimate for this mode.
2949       this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
2950     }
2951
2952     if (ref_frame == INTRA_FRAME) {
2953     // Keep record of best intra rd
2954       if (this_rd < best_intra_rd) {
2955         best_intra_rd = this_rd;
2956         best_intra_mode = mbmi->mode;
2957       }
2958     }
2959
2960     if (!disable_skip && ref_frame == INTRA_FRAME) {
2961       for (i = 0; i < REFERENCE_MODES; ++i)
2962         best_pred_rd[i] = MIN(best_pred_rd[i], this_rd);
2963       for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
2964         best_filter_rd[i] = MIN(best_filter_rd[i], this_rd);
2965     }
2966
2967     // Did this mode help.. i.e. is it the new best mode
2968     if (this_rd < best_rd || x->skip) {
2969       int max_plane = MAX_MB_PLANE;
2970       if (!mode_excluded) {
2971         // Note index of best mode so far
2972         best_mode_index = mode_index;
2973
2974         if (ref_frame == INTRA_FRAME) {
2975           /* required for left and above block mv */
2976           mbmi->mv[0].as_int = 0;
2977           max_plane = 1;
2978         } else {
2979           best_pred_sse = x->pred_sse[ref_frame];
2980         }
2981
2982         *returnrate = rate2;
2983         *returndistortion = distortion2;
2984         best_rd = this_rd;
2985         best_mbmode = *mbmi;
2986         best_skip2 = this_skip2;
2987         best_mode_skippable = skippable;
2988
2989         if (!x->select_tx_size)
2990           swap_block_ptr(x, ctx, 1, 0, 0, max_plane);
2991         vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
2992                    sizeof(uint8_t) * ctx->num_4x4_blk);
2993
2994         // TODO(debargha): enhance this test with a better distortion prediction
2995         // based on qp, activity mask and history
2996         if ((mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
2997             (mode_index > MIN_EARLY_TERM_INDEX)) {
2998           const int qstep = xd->plane[0].dequant[1];
2999           // TODO(debargha): Enhance this by specializing for each mode_index
3000           int scale = 4;
3001           if (x->source_variance < UINT_MAX) {
3002             const int var_adjust = (x->source_variance < 16);
3003             scale -= var_adjust;
3004           }
3005           if (ref_frame > INTRA_FRAME &&
3006               distortion2 * scale < qstep * qstep) {
3007             early_term = 1;
3008           }
3009         }
3010       }
3011     }
3012
3013     /* keep record of best compound/single-only prediction */
3014     if (!disable_skip && ref_frame != INTRA_FRAME) {
3015       int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
3016
3017       if (cm->reference_mode == REFERENCE_MODE_SELECT) {
3018         single_rate = rate2 - compmode_cost;
3019         hybrid_rate = rate2;
3020       } else {
3021         single_rate = rate2;
3022         hybrid_rate = rate2 + compmode_cost;
3023       }
3024
3025       single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
3026       hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
3027
3028       if (!comp_pred) {
3029         if (single_rd < best_pred_rd[SINGLE_REFERENCE])
3030           best_pred_rd[SINGLE_REFERENCE] = single_rd;
3031       } else {
3032         if (single_rd < best_pred_rd[COMPOUND_REFERENCE])
3033           best_pred_rd[COMPOUND_REFERENCE] = single_rd;
3034       }
3035       if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT])
3036         best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
3037
3038       /* keep record of best filter type */
3039       if (!mode_excluded && cm->interp_filter != BILINEAR) {
3040         int64_t ref = rd_opt->filter_cache[cm->interp_filter == SWITCHABLE ?
3041                               SWITCHABLE_FILTERS : cm->interp_filter];
3042
3043         for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
3044           int64_t adj_rd;
3045           if (ref == INT64_MAX)
3046             adj_rd = 0;
3047           else if (rd_opt->filter_cache[i] == INT64_MAX)
3048             // when early termination is triggered, the encoder does not have
3049             // access to the rate-distortion cost. it only knows that the cost
3050             // should be above the maximum valid value. hence it takes the known
3051             // maximum plus an arbitrary constant as the rate-distortion cost.
3052             adj_rd = rd_opt->mask_filter - ref + 10;
3053           else
3054             adj_rd = rd_opt->filter_cache[i] - ref;
3055
3056           adj_rd += this_rd;
3057           best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd);
3058         }
3059       }
3060     }
3061
3062     /* keep record of best txfm size */
3063     if (bsize < BLOCK_32X32) {
3064       if (bsize < BLOCK_16X16)
3065         tx_cache[ALLOW_16X16] = tx_cache[ALLOW_8X8];
3066
3067       tx_cache[ALLOW_32X32] = tx_cache[ALLOW_16X16];
3068     }
3069     if (!mode_excluded && this_rd != INT64_MAX) {
3070       for (i = 0; i < TX_MODES && tx_cache[i] < INT64_MAX; i++) {
3071         int64_t adj_rd = INT64_MAX;
3072         adj_rd = this_rd + tx_cache[i] - tx_cache[cm->tx_mode];
3073
3074         if (adj_rd < best_tx_rd[i])
3075           best_tx_rd[i] = adj_rd;
3076       }
3077     }
3078
3079     if (early_term)
3080       break;
3081
3082     if (x->skip && !comp_pred)
3083       break;
3084   }
3085
3086   // The inter modes' rate costs are not calculated precisely in some cases.
3087   // Therefore, sometimes, NEWMV is chosen instead of NEARESTMV, NEARMV, and
3088   // ZEROMV. Here, checks are added for those cases, and the mode decisions
3089   // are corrected.
3090   if (best_mbmode.mode == NEWMV) {
3091     const MV_REFERENCE_FRAME refs[2] = {best_mbmode.ref_frame[0],
3092         best_mbmode.ref_frame[1]};
3093     int comp_pred_mode = refs[1] > INTRA_FRAME;
3094
3095     if (frame_mv[NEARESTMV][refs[0]].as_int == best_mbmode.mv[0].as_int &&
3096         ((comp_pred_mode && frame_mv[NEARESTMV][refs[1]].as_int ==
3097             best_mbmode.mv[1].as_int) || !comp_pred_mode))
3098       best_mbmode.mode = NEARESTMV;
3099     else if (frame_mv[NEARMV][refs[0]].as_int == best_mbmode.mv[0].as_int &&
3100         ((comp_pred_mode && frame_mv[NEARMV][refs[1]].as_int ==
3101             best_mbmode.mv[1].as_int) || !comp_pred_mode))
3102       best_mbmode.mode = NEARMV;
3103     else if (best_mbmode.mv[0].as_int == 0 &&
3104         ((comp_pred_mode && best_mbmode.mv[1].as_int == 0) || !comp_pred_mode))
3105       best_mbmode.mode = ZEROMV;
3106   }
3107
3108   if (best_mode_index < 0 || best_rd >= best_rd_so_far)
3109     return INT64_MAX;
3110
3111   // If we used an estimate for the uv intra rd in the loop above...
3112   if (cpi->sf.use_uv_intra_rd_estimate) {
3113     // Do Intra UV best rd mode selection if best mode choice above was intra.
3114     if (best_mbmode.ref_frame[0] == INTRA_FRAME) {
3115       TX_SIZE uv_tx_size;
3116       *mbmi = best_mbmode;
3117       uv_tx_size = get_uv_tx_size(mbmi, &xd->plane[1]);
3118       rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size],
3119                               &rate_uv_tokenonly[uv_tx_size],
3120                               &dist_uv[uv_tx_size],
3121                               &skip_uv[uv_tx_size],
3122                               bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize,
3123                               uv_tx_size);
3124     }
3125   }
3126
3127   assert((cm->interp_filter == SWITCHABLE) ||
3128          (cm->interp_filter == best_mbmode.interp_filter) ||
3129          !is_inter_block(&best_mbmode));
3130
3131   update_rd_thresh_fact(cpi, bsize, best_mode_index);
3132
3133   // macroblock modes
3134   *mbmi = best_mbmode;
3135   x->skip |= best_skip2;
3136
3137   for (i = 0; i < REFERENCE_MODES; ++i) {
3138     if (best_pred_rd[i] == INT64_MAX)
3139       best_pred_diff[i] = INT_MIN;
3140     else
3141       best_pred_diff[i] = best_rd - best_pred_rd[i];
3142   }
3143
3144   if (!x->skip) {
3145     for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
3146       if (best_filter_rd[i] == INT64_MAX)
3147         best_filter_diff[i] = 0;
3148       else
3149         best_filter_diff[i] = best_rd - best_filter_rd[i];
3150     }
3151     if (cm->interp_filter == SWITCHABLE)
3152       assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
3153     for (i = 0; i < TX_MODES; i++) {
3154       if (best_tx_rd[i] == INT64_MAX)
3155         best_tx_diff[i] = 0;
3156       else
3157         best_tx_diff[i] = best_rd - best_tx_rd[i];
3158     }
3159   } else {
3160     vp9_zero(best_filter_diff);
3161     vp9_zero(best_tx_diff);
3162   }
3163
3164   store_coding_context(x, ctx, best_mode_index, best_pred_diff,
3165                        best_tx_diff, best_filter_diff, best_mode_skippable);
3166
3167   return best_rd;
3168 }
3169
3170 int64_t vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi, MACROBLOCK *x,
3171                                            int *returnrate,
3172                                            int64_t *returndistortion,
3173                                            BLOCK_SIZE bsize,
3174                                            PICK_MODE_CONTEXT *ctx,
3175                                            int64_t best_rd_so_far) {
3176   VP9_COMMON *const cm = &cpi->common;
3177   RD_OPT *const rd_opt = &cpi->rd;
3178   MACROBLOCKD *const xd = &x->e_mbd;
3179   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
3180   unsigned char segment_id = mbmi->segment_id;
3181   const int comp_pred = 0;
3182   int i;
3183   int64_t best_tx_diff[TX_MODES];
3184   int64_t best_pred_diff[REFERENCE_MODES];
3185   int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
3186   unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
3187   vp9_prob comp_mode_p;
3188   INTERP_FILTER best_filter = SWITCHABLE;
3189   int64_t this_rd = INT64_MAX;
3190   int rate2 = 0;
3191   const int64_t distortion2 = 0;
3192
3193   x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
3194
3195   estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
3196                            &comp_mode_p);
3197
3198   for (i = 0; i < MAX_REF_FRAMES; ++i)
3199     x->pred_sse[i] = INT_MAX;
3200   for (i = LAST_FRAME; i < MAX_REF_FRAMES; ++i)
3201     x->pred_mv_sad[i] = INT_MAX;
3202
3203   *returnrate = INT_MAX;
3204
3205   assert(vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
3206
3207   mbmi->mode = ZEROMV;
3208   mbmi->uv_mode = DC_PRED;
3209   mbmi->ref_frame[0] = LAST_FRAME;
3210   mbmi->ref_frame[1] = NONE;
3211   mbmi->mv[0].as_int = 0;
3212   x->skip = 1;
3213
3214   // Search for best switchable filter by checking the variance of
3215   // pred error irrespective of whether the filter will be used
3216   rd_opt->mask_filter = 0;
3217   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
3218     rd_opt->filter_cache[i] = INT64_MAX;
3219
3220   if (cm->interp_filter != BILINEAR) {
3221     best_filter = EIGHTTAP;
3222     if (cm->interp_filter == SWITCHABLE &&
3223         x->source_variance >= cpi->sf.disable_filter_search_var_thresh) {
3224       int rs;
3225       int best_rs = INT_MAX;
3226       for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
3227         mbmi->interp_filter = i;
3228         rs = vp9_get_switchable_rate(cpi);
3229         if (rs < best_rs) {
3230           best_rs = rs;
3231           best_filter = mbmi->interp_filter;
3232         }
3233       }
3234     }
3235   }
3236   // Set the appropriate filter
3237   if (cm->interp_filter == SWITCHABLE) {
3238     mbmi->interp_filter = best_filter;
3239     rate2 += vp9_get_switchable_rate(cpi);
3240   } else {
3241     mbmi->interp_filter = cm->interp_filter;
3242   }
3243
3244   if (cm->reference_mode == REFERENCE_MODE_SELECT)
3245     rate2 += vp9_cost_bit(comp_mode_p, comp_pred);
3246
3247   // Estimate the reference frame signaling cost and add it
3248   // to the rolling cost variable.
3249   rate2 += ref_costs_single[LAST_FRAME];
3250   this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
3251
3252   *returnrate = rate2;
3253   *returndistortion = distortion2;
3254
3255   if (this_rd >= best_rd_so_far)
3256     return INT64_MAX;
3257
3258   assert((cm->interp_filter == SWITCHABLE) ||
3259          (cm->interp_filter == mbmi->interp_filter));
3260
3261   update_rd_thresh_fact(cpi, bsize, THR_ZEROMV);
3262
3263   vp9_zero(best_pred_diff);
3264   vp9_zero(best_filter_diff);
3265   vp9_zero(best_tx_diff);
3266
3267   if (!x->select_tx_size)
3268     swap_block_ptr(x, ctx, 1, 0, 0, MAX_MB_PLANE);
3269   store_coding_context(x, ctx, THR_ZEROMV,
3270                        best_pred_diff, best_tx_diff, best_filter_diff, 0);
3271
3272   return this_rd;
3273 }
3274
3275 int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
3276                                       const TileInfo *const tile,
3277                                       int mi_row, int mi_col,
3278                                       int *returnrate,
3279                                       int64_t *returndistortion,
3280                                       BLOCK_SIZE bsize,
3281                                       PICK_MODE_CONTEXT *ctx,
3282                                       int64_t best_rd_so_far) {
3283   VP9_COMMON *const cm = &cpi->common;
3284   RD_OPT *const rd_opt = &cpi->rd;
3285   MACROBLOCKD *const xd = &x->e_mbd;
3286   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
3287   const struct segmentation *const seg = &cm->seg;
3288   MV_REFERENCE_FRAME ref_frame, second_ref_frame;
3289   unsigned char segment_id = mbmi->segment_id;
3290   int comp_pred, i;
3291   int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
3292   struct buf_2d yv12_mb[4][MAX_MB_PLANE];
3293   static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
3294                                     VP9_ALT_FLAG };
3295   int64_t best_rd = best_rd_so_far;
3296   int64_t best_yrd = best_rd_so_far;  // FIXME(rbultje) more precise
3297   static const int64_t best_tx_diff[TX_MODES] = { 0 };
3298   int64_t best_pred_diff[REFERENCE_MODES];
3299   int64_t best_pred_rd[REFERENCE_MODES];
3300   int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
3301   int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
3302   MB_MODE_INFO best_mbmode;
3303   int ref_index, best_ref_index = 0;
3304   unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
3305   vp9_prob comp_mode_p;
3306   INTERP_FILTER tmp_best_filter = SWITCHABLE;
3307   int rate_uv_intra, rate_uv_tokenonly;
3308   int64_t dist_uv;
3309   int skip_uv;
3310   PREDICTION_MODE mode_uv = DC_PRED;
3311   int intra_cost_penalty = 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q);
3312   int_mv seg_mvs[4][MAX_REF_FRAMES];
3313   b_mode_info best_bmodes[4];
3314   int best_skip2 = 0;
3315   int ref_frame_skip_mask[2] = { 0 };
3316
3317   x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
3318   vpx_memset(x->zcoeff_blk[TX_4X4], 0, 4);
3319   vp9_zero(best_mbmode);
3320
3321   for (i = 0; i < 4; i++) {
3322     int j;
3323     for (j = 0; j < MAX_REF_FRAMES; j++)
3324       seg_mvs[i][j].as_int = INVALID_MV;
3325   }
3326
3327   estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
3328                            &comp_mode_p);
3329
3330   for (i = 0; i < REFERENCE_MODES; ++i)
3331     best_pred_rd[i] = INT64_MAX;
3332   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
3333     best_filter_rd[i] = INT64_MAX;
3334   rate_uv_intra = INT_MAX;
3335
3336   *returnrate = INT_MAX;
3337
3338   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
3339     if (cpi->ref_frame_flags & flag_list[ref_frame]) {
3340       setup_buffer_inter(cpi, x, tile,
3341                              ref_frame, bsize, mi_row, mi_col,
3342                              frame_mv[NEARESTMV], frame_mv[NEARMV],
3343                              yv12_mb);
3344     } else {
3345       ref_frame_skip_mask[0] |= (1 << ref_frame);
3346       ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
3347     }
3348     frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
3349     frame_mv[ZEROMV][ref_frame].as_int = 0;
3350   }
3351
3352   for (ref_index = 0; ref_index < MAX_REFS; ++ref_index) {
3353     int mode_excluded = 0;
3354     int64_t this_rd = INT64_MAX;
3355     int disable_skip = 0;
3356     int compmode_cost = 0;
3357     int rate2 = 0, rate_y = 0, rate_uv = 0;
3358     int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
3359     int skippable = 0;
3360     int i;
3361     int this_skip2 = 0;
3362     int64_t total_sse = INT_MAX;
3363     int early_term = 0;
3364
3365     ref_frame = vp9_ref_order[ref_index].ref_frame[0];
3366     second_ref_frame = vp9_ref_order[ref_index].ref_frame[1];
3367
3368     // Look at the reference frame of the best mode so far and set the
3369     // skip mask to look at a subset of the remaining modes.
3370     if (ref_index > 2 && cpi->sf.mode_skip_start < MAX_MODES) {
3371       if (ref_index == 3) {
3372         switch (vp9_ref_order[best_ref_index].ref_frame[0]) {
3373           case INTRA_FRAME:
3374             break;
3375           case LAST_FRAME:
3376             ref_frame_skip_mask[0] |= (1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME);
3377             ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
3378             break;
3379           case GOLDEN_FRAME:
3380             ref_frame_skip_mask[0] |= (1 << LAST_FRAME) | (1 << ALTREF_FRAME);
3381             ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
3382             break;
3383           case ALTREF_FRAME:
3384             ref_frame_skip_mask[0] |= (1 << GOLDEN_FRAME) | (1 << LAST_FRAME);
3385             break;
3386           case NONE:
3387           case MAX_REF_FRAMES:
3388             assert(0 && "Invalid Reference frame");
3389             break;
3390         }
3391       }
3392     }
3393
3394     if (ref_frame_skip_mask[0] & (1 << ref_index) &&
3395         ref_frame_skip_mask[1] & (1 << ref_index))
3396       continue;
3397
3398     // Test best rd so far against threshold for trying this mode.
3399     if (rd_less_than_thresh(best_rd,
3400                             rd_opt->threshes[segment_id][bsize][ref_index],
3401                             rd_opt->thresh_freq_fact[bsize][ref_index]))
3402       continue;
3403
3404     if (ref_frame > INTRA_FRAME &&
3405         !(cpi->ref_frame_flags & flag_list[ref_frame])) {
3406       continue;
3407     }
3408
3409     comp_pred = second_ref_frame > INTRA_FRAME;
3410     if (comp_pred) {
3411       if (!cm->allow_comp_inter_inter)
3412         continue;
3413
3414       if (!(cpi->ref_frame_flags & flag_list[second_ref_frame]))
3415         continue;
3416       // Do not allow compound prediction if the segment level reference frame
3417       // feature is in use as in this case there can only be one reference.
3418       if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
3419         continue;
3420       if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
3421           vp9_ref_order[best_ref_index].ref_frame[0] == INTRA_FRAME)
3422         continue;
3423     }
3424
3425     // TODO(jingning, jkoleszar): scaling reference frame not supported for
3426     // sub8x8 blocks.
3427     if (ref_frame > INTRA_FRAME &&
3428         vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf))
3429       continue;
3430
3431     if (second_ref_frame > INTRA_FRAME &&
3432         vp9_is_scaled(&cm->frame_refs[second_ref_frame - 1].sf))
3433       continue;
3434
3435     if (comp_pred)
3436       mode_excluded = cm->reference_mode == SINGLE_REFERENCE;
3437     else if (ref_frame != INTRA_FRAME)
3438       mode_excluded = cm->reference_mode == COMPOUND_REFERENCE;
3439
3440     // If the segment reference frame feature is enabled....
3441     // then do nothing if the current ref frame is not allowed..
3442     if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
3443         vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
3444       continue;
3445     // Disable this drop out case if the ref frame
3446     // segment level feature is enabled for this segment. This is to
3447     // prevent the possibility that we end up unable to pick any mode.
3448     } else if (!vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
3449       // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
3450       // unless ARNR filtering is enabled in which case we want
3451       // an unfiltered alternative. We allow near/nearest as well
3452       // because they may result in zero-zero MVs but be cheaper.
3453       if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0))
3454         continue;
3455     }
3456
3457     mbmi->tx_size = TX_4X4;
3458     mbmi->uv_mode = DC_PRED;
3459     mbmi->ref_frame[0] = ref_frame;
3460     mbmi->ref_frame[1] = second_ref_frame;
3461     // Evaluate all sub-pel filters irrespective of whether we can use
3462     // them for this frame.
3463     mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
3464                                                           : cm->interp_filter;
3465     x->skip = 0;
3466     set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
3467
3468     // Select prediction reference frames.
3469     for (i = 0; i < MAX_MB_PLANE; i++) {
3470       xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
3471       if (comp_pred)
3472         xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
3473     }
3474
3475     if (ref_frame == INTRA_FRAME) {
3476       int rate;
3477       if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate, &rate_y,
3478                                        &distortion_y, best_rd) >= best_rd)
3479         continue;
3480       rate2 += rate;
3481       rate2 += intra_cost_penalty;
3482       distortion2 += distortion_y;
3483
3484       if (rate_uv_intra == INT_MAX) {
3485         choose_intra_uv_mode(cpi, ctx, bsize, TX_4X4,
3486                              &rate_uv_intra,
3487                              &rate_uv_tokenonly,
3488                              &dist_uv, &skip_uv,
3489                              &mode_uv);
3490       }
3491       rate2 += rate_uv_intra;
3492       rate_uv = rate_uv_tokenonly;
3493       distortion2 += dist_uv;
3494       distortion_uv = dist_uv;
3495       mbmi->uv_mode = mode_uv;
3496     } else {
3497       int rate;
3498       int64_t distortion;
3499       int64_t this_rd_thresh;
3500       int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX;
3501       int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX;
3502       int64_t tmp_best_distortion = INT_MAX, tmp_best_sse, uv_sse;
3503       int tmp_best_skippable = 0;
3504       int switchable_filter_index;
3505       int_mv *second_ref = comp_pred ?
3506                              &mbmi->ref_mvs[second_ref_frame][0] : NULL;
3507       b_mode_info tmp_best_bmodes[16];
3508       MB_MODE_INFO tmp_best_mbmode;
3509       BEST_SEG_INFO bsi[SWITCHABLE_FILTERS];
3510       int pred_exists = 0;
3511       int uv_skippable;
3512
3513       this_rd_thresh = (ref_frame == LAST_FRAME) ?
3514           rd_opt->threshes[segment_id][bsize][THR_LAST] :
3515           rd_opt->threshes[segment_id][bsize][THR_ALTR];
3516       this_rd_thresh = (ref_frame == GOLDEN_FRAME) ?
3517       rd_opt->threshes[segment_id][bsize][THR_GOLD] : this_rd_thresh;
3518       rd_opt->mask_filter = 0;
3519       for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
3520         rd_opt->filter_cache[i] = INT64_MAX;
3521
3522       if (cm->interp_filter != BILINEAR) {
3523         tmp_best_filter = EIGHTTAP;
3524         if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) {
3525           tmp_best_filter = EIGHTTAP;
3526         } else if (cpi->sf.adaptive_pred_interp_filter == 1 &&
3527                    ctx->pred_interp_filter < SWITCHABLE) {
3528           tmp_best_filter = ctx->pred_interp_filter;
3529         } else if (cpi->sf.adaptive_pred_interp_filter == 2) {
3530           tmp_best_filter = ctx->pred_interp_filter < SWITCHABLE ?
3531                               ctx->pred_interp_filter : 0;
3532         } else {
3533           for (switchable_filter_index = 0;
3534                switchable_filter_index < SWITCHABLE_FILTERS;
3535                ++switchable_filter_index) {
3536             int newbest, rs;
3537             int64_t rs_rd;
3538             mbmi->interp_filter = switchable_filter_index;
3539             tmp_rd = rd_pick_best_sub8x8_mode(cpi, x, tile,
3540                                               &mbmi->ref_mvs[ref_frame][0],
3541                                               second_ref, best_yrd, &rate,
3542                                               &rate_y, &distortion,
3543                                               &skippable, &total_sse,
3544                                               (int) this_rd_thresh, seg_mvs,
3545                                               bsi, switchable_filter_index,
3546                                               mi_row, mi_col);
3547
3548             if (tmp_rd == INT64_MAX)
3549               continue;
3550             rs = vp9_get_switchable_rate(cpi);
3551             rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
3552             rd_opt->filter_cache[switchable_filter_index] = tmp_rd;
3553             rd_opt->filter_cache[SWITCHABLE_FILTERS] =
3554                 MIN(rd_opt->filter_cache[SWITCHABLE_FILTERS],
3555                     tmp_rd + rs_rd);
3556             if (cm->interp_filter == SWITCHABLE)
3557               tmp_rd += rs_rd;
3558
3559             rd_opt->mask_filter = MAX(rd_opt->mask_filter, tmp_rd);
3560
3561             newbest = (tmp_rd < tmp_best_rd);
3562             if (newbest) {
3563               tmp_best_filter = mbmi->interp_filter;
3564               tmp_best_rd = tmp_rd;
3565             }
3566             if ((newbest && cm->interp_filter == SWITCHABLE) ||
3567                 (mbmi->interp_filter == cm->interp_filter &&
3568                  cm->interp_filter != SWITCHABLE)) {
3569               tmp_best_rdu = tmp_rd;
3570               tmp_best_rate = rate;
3571               tmp_best_ratey = rate_y;
3572               tmp_best_distortion = distortion;
3573               tmp_best_sse = total_sse;
3574               tmp_best_skippable = skippable;
3575               tmp_best_mbmode = *mbmi;
3576               for (i = 0; i < 4; i++) {
3577                 tmp_best_bmodes[i] = xd->mi[0]->bmi[i];
3578                 x->zcoeff_blk[TX_4X4][i] = !x->plane[0].eobs[i];
3579               }
3580               pred_exists = 1;
3581               if (switchable_filter_index == 0 &&
3582                   cpi->sf.use_rd_breakout &&
3583                   best_rd < INT64_MAX) {
3584                 if (tmp_best_rdu / 2 > best_rd) {
3585                   // skip searching the other filters if the first is
3586                   // already substantially larger than the best so far
3587                   tmp_best_filter = mbmi->interp_filter;
3588                   tmp_best_rdu = INT64_MAX;
3589                   break;
3590                 }
3591               }
3592             }
3593           }  // switchable_filter_index loop
3594         }
3595       }
3596
3597       if (tmp_best_rdu == INT64_MAX && pred_exists)
3598         continue;
3599
3600       mbmi->interp_filter = (cm->interp_filter == SWITCHABLE ?
3601                              tmp_best_filter : cm->interp_filter);
3602       if (!pred_exists) {
3603         // Handles the special case when a filter that is not in the
3604         // switchable list (bilinear, 6-tap) is indicated at the frame level
3605         tmp_rd = rd_pick_best_sub8x8_mode(cpi, x, tile,
3606                                           &mbmi->ref_mvs[ref_frame][0],
3607                                           second_ref, best_yrd, &rate, &rate_y,
3608                                           &distortion, &skippable, &total_sse,
3609                                           (int) this_rd_thresh, seg_mvs, bsi, 0,
3610                                           mi_row, mi_col);
3611         if (tmp_rd == INT64_MAX)
3612           continue;
3613       } else {
3614         total_sse = tmp_best_sse;
3615         rate = tmp_best_rate;
3616         rate_y = tmp_best_ratey;
3617         distortion = tmp_best_distortion;
3618         skippable = tmp_best_skippable;
3619         *mbmi = tmp_best_mbmode;
3620         for (i = 0; i < 4; i++)
3621           xd->mi[0]->bmi[i] = tmp_best_bmodes[i];
3622       }
3623
3624       rate2 += rate;
3625       distortion2 += distortion;
3626
3627       if (cm->interp_filter == SWITCHABLE)
3628         rate2 += vp9_get_switchable_rate(cpi);
3629
3630       if (!mode_excluded)
3631         mode_excluded = comp_pred ? cm->reference_mode == SINGLE_REFERENCE
3632                                   : cm->reference_mode == COMPOUND_REFERENCE;
3633
3634       compmode_cost = vp9_cost_bit(comp_mode_p, comp_pred);
3635
3636       tmp_best_rdu = best_rd -
3637           MIN(RDCOST(x->rdmult, x->rddiv, rate2, distortion2),
3638               RDCOST(x->rdmult, x->rddiv, 0, total_sse));
3639
3640       if (tmp_best_rdu > 0) {
3641         // If even the 'Y' rd value of split is higher than best so far
3642         // then dont bother looking at UV
3643         vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col,
3644                                         BLOCK_8X8);
3645         super_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,
3646                          &uv_sse, BLOCK_8X8, tmp_best_rdu);
3647         if (rate_uv == INT_MAX)
3648           continue;
3649         rate2 += rate_uv;
3650         distortion2 += distortion_uv;
3651         skippable = skippable && uv_skippable;
3652         total_sse += uv_sse;
3653       }
3654     }
3655
3656     if (cm->reference_mode == REFERENCE_MODE_SELECT)
3657       rate2 += compmode_cost;
3658
3659     // Estimate the reference frame signaling cost and add it
3660     // to the rolling cost variable.
3661     if (second_ref_frame > INTRA_FRAME) {
3662       rate2 += ref_costs_comp[ref_frame];
3663     } else {
3664       rate2 += ref_costs_single[ref_frame];
3665     }
3666
3667     if (!disable_skip) {
3668       // Skip is never coded at the segment level for sub8x8 blocks and instead
3669       // always coded in the bitstream at the mode info level.
3670
3671       if (ref_frame != INTRA_FRAME && !xd->lossless) {
3672         if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
3673             RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
3674           // Add in the cost of the no skip flag.
3675           rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
3676         } else {
3677           // FIXME(rbultje) make this work for splitmv also
3678           rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
3679           distortion2 = total_sse;
3680           assert(total_sse >= 0);
3681           rate2 -= (rate_y + rate_uv);
3682           rate_y = 0;
3683           rate_uv = 0;
3684           this_skip2 = 1;
3685         }
3686       } else {
3687         // Add in the cost of the no skip flag.
3688         rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
3689       }
3690
3691       // Calculate the final RD estimate for this mode.
3692       this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
3693     }
3694
3695     if (!disable_skip && ref_frame == INTRA_FRAME) {
3696       for (i = 0; i < REFERENCE_MODES; ++i)
3697         best_pred_rd[i] = MIN(best_pred_rd[i], this_rd);
3698       for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
3699         best_filter_rd[i] = MIN(best_filter_rd[i], this_rd);
3700     }
3701
3702     // Did this mode help.. i.e. is it the new best mode
3703     if (this_rd < best_rd || x->skip) {
3704       if (!mode_excluded) {
3705         int max_plane = MAX_MB_PLANE;
3706         // Note index of best mode so far
3707         best_ref_index = ref_index;
3708
3709         if (ref_frame == INTRA_FRAME) {
3710           /* required for left and above block mv */
3711           mbmi->mv[0].as_int = 0;
3712           max_plane = 1;
3713         }
3714
3715         *returnrate = rate2;
3716         *returndistortion = distortion2;
3717         best_rd = this_rd;
3718         best_yrd = best_rd -
3719                    RDCOST(x->rdmult, x->rddiv, rate_uv, distortion_uv);
3720         best_mbmode = *mbmi;
3721         best_skip2 = this_skip2;
3722         if (!x->select_tx_size)
3723           swap_block_ptr(x, ctx, 1, 0, 0, max_plane);
3724         vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[TX_4X4],
3725                    sizeof(uint8_t) * ctx->num_4x4_blk);
3726
3727         for (i = 0; i < 4; i++)
3728           best_bmodes[i] = xd->mi[0]->bmi[i];
3729
3730         // TODO(debargha): enhance this test with a better distortion prediction
3731         // based on qp, activity mask and history
3732         if ((cpi->sf.mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
3733             (ref_index > MIN_EARLY_TERM_INDEX)) {
3734           const int qstep = xd->plane[0].dequant[1];
3735           // TODO(debargha): Enhance this by specializing for each mode_index
3736           int scale = 4;
3737           if (x->source_variance < UINT_MAX) {
3738             const int var_adjust = (x->source_variance < 16);
3739             scale -= var_adjust;
3740           }
3741           if (ref_frame > INTRA_FRAME &&
3742               distortion2 * scale < qstep * qstep) {
3743             early_term = 1;
3744           }
3745         }
3746       }
3747     }
3748
3749     /* keep record of best compound/single-only prediction */
3750     if (!disable_skip && ref_frame != INTRA_FRAME) {
3751       int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
3752
3753       if (cm->reference_mode == REFERENCE_MODE_SELECT) {
3754         single_rate = rate2 - compmode_cost;
3755         hybrid_rate = rate2;
3756       } else {
3757         single_rate = rate2;
3758         hybrid_rate = rate2 + compmode_cost;
3759       }
3760
3761       single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
3762       hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
3763
3764       if (!comp_pred && single_rd < best_pred_rd[SINGLE_REFERENCE])
3765         best_pred_rd[SINGLE_REFERENCE] = single_rd;
3766       else if (comp_pred && single_rd < best_pred_rd[COMPOUND_REFERENCE])
3767         best_pred_rd[COMPOUND_REFERENCE] = single_rd;
3768
3769       if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT])
3770         best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
3771     }
3772
3773     /* keep record of best filter type */
3774     if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME &&
3775         cm->interp_filter != BILINEAR) {
3776       int64_t ref = rd_opt->filter_cache[cm->interp_filter == SWITCHABLE ?
3777                               SWITCHABLE_FILTERS : cm->interp_filter];
3778       int64_t adj_rd;
3779       for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
3780         if (ref == INT64_MAX)
3781           adj_rd = 0;
3782         else if (rd_opt->filter_cache[i] == INT64_MAX)
3783           // when early termination is triggered, the encoder does not have
3784           // access to the rate-distortion cost. it only knows that the cost
3785           // should be above the maximum valid value. hence it takes the known
3786           // maximum plus an arbitrary constant as the rate-distortion cost.
3787           adj_rd = rd_opt->mask_filter - ref + 10;
3788         else
3789           adj_rd = rd_opt->filter_cache[i] - ref;
3790
3791         adj_rd += this_rd;
3792         best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd);
3793       }
3794     }
3795
3796     if (early_term)
3797       break;
3798
3799     if (x->skip && !comp_pred)
3800       break;
3801   }
3802
3803   if (best_rd >= best_rd_so_far)
3804     return INT64_MAX;
3805
3806   // If we used an estimate for the uv intra rd in the loop above...
3807   if (cpi->sf.use_uv_intra_rd_estimate) {
3808     // Do Intra UV best rd mode selection if best mode choice above was intra.
3809     if (vp9_ref_order[best_ref_index].ref_frame[0] == INTRA_FRAME) {
3810       *mbmi = best_mbmode;
3811       rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra,
3812                               &rate_uv_tokenonly,
3813                               &dist_uv,
3814                               &skip_uv,
3815                               BLOCK_8X8, TX_4X4);
3816     }
3817   }
3818
3819   if (best_rd == INT64_MAX) {
3820     *returnrate = INT_MAX;
3821     *returndistortion = INT64_MAX;
3822     return best_rd;
3823   }
3824
3825   assert((cm->interp_filter == SWITCHABLE) ||
3826          (cm->interp_filter == best_mbmode.interp_filter) ||
3827          !is_inter_block(&best_mbmode));
3828
3829   update_rd_thresh_fact(cpi, bsize, best_ref_index);
3830
3831   // macroblock modes
3832   *mbmi = best_mbmode;
3833   x->skip |= best_skip2;
3834   if (!is_inter_block(&best_mbmode)) {
3835     for (i = 0; i < 4; i++)
3836       xd->mi[0]->bmi[i].as_mode = best_bmodes[i].as_mode;
3837   } else {
3838     for (i = 0; i < 4; ++i)
3839       vpx_memcpy(&xd->mi[0]->bmi[i], &best_bmodes[i], sizeof(b_mode_info));
3840
3841     mbmi->mv[0].as_int = xd->mi[0]->bmi[3].as_mv[0].as_int;
3842     mbmi->mv[1].as_int = xd->mi[0]->bmi[3].as_mv[1].as_int;
3843   }
3844
3845   for (i = 0; i < REFERENCE_MODES; ++i) {
3846     if (best_pred_rd[i] == INT64_MAX)
3847       best_pred_diff[i] = INT_MIN;
3848     else
3849       best_pred_diff[i] = best_rd - best_pred_rd[i];
3850   }
3851
3852   if (!x->skip) {
3853     for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
3854       if (best_filter_rd[i] == INT64_MAX)
3855         best_filter_diff[i] = 0;
3856       else
3857         best_filter_diff[i] = best_rd - best_filter_rd[i];
3858     }
3859     if (cm->interp_filter == SWITCHABLE)
3860       assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
3861   } else {
3862     vp9_zero(best_filter_diff);
3863   }
3864
3865   store_coding_context(x, ctx, best_ref_index,
3866                        best_pred_diff, best_tx_diff, best_filter_diff, 0);
3867
3868   return best_rd;
3869 }