2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
16 #include "vp9/common/vp9_pragmas.h"
17 #include "vp9/encoder/vp9_tokenize.h"
18 #include "vp9/encoder/vp9_treewriter.h"
19 #include "vp9/encoder/vp9_onyx_int.h"
20 #include "vp9/common/vp9_entropymode.h"
21 #include "vp9/common/vp9_reconinter.h"
22 #include "vp9/common/vp9_reconintra.h"
23 #include "vp9/common/vp9_quant_common.h"
24 #include "vp9/encoder/vp9_encodemb.h"
25 #include "vp9/encoder/vp9_quantize.h"
26 #include "vp9/encoder/vp9_variance.h"
27 #include "vp9/encoder/vp9_mcomp.h"
28 #include "vp9/encoder/vp9_rdopt.h"
29 #include "vp9/encoder/vp9_ratectrl.h"
30 #include "vpx_mem/vpx_mem.h"
31 #include "vp9/common/vp9_systemdependent.h"
32 #include "vp9/encoder/vp9_encodemv.h"
33 #include "vp9/common/vp9_seg_common.h"
34 #include "vp9/common/vp9_pred_common.h"
35 #include "vp9/common/vp9_entropy.h"
36 #include "./vp9_rtcd.h"
37 #include "vp9/common/vp9_mvref_common.h"
38 #include "vp9/common/vp9_common.h"
40 #define INVALID_MV 0x80008000
42 /* Factor to weigh the rate for switchable interp filters */
43 #define SWITCHABLE_INTERP_RATE_FACTOR 1
45 #define LAST_FRAME_MODE_MASK 0xFFEDCD60
46 #define GOLDEN_FRAME_MODE_MASK 0xFFDA3BB0
47 #define ALT_REF_MODE_MASK 0xFFC648D0
49 #define MIN_EARLY_TERM_INDEX 3
51 const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
52 {NEARESTMV, LAST_FRAME, NONE},
53 {NEARESTMV, ALTREF_FRAME, NONE},
54 {NEARESTMV, GOLDEN_FRAME, NONE},
56 {DC_PRED, INTRA_FRAME, NONE},
58 {NEWMV, LAST_FRAME, NONE},
59 {NEWMV, ALTREF_FRAME, NONE},
60 {NEWMV, GOLDEN_FRAME, NONE},
62 {NEARMV, LAST_FRAME, NONE},
63 {NEARMV, ALTREF_FRAME, NONE},
64 {NEARESTMV, LAST_FRAME, ALTREF_FRAME},
65 {NEARESTMV, GOLDEN_FRAME, ALTREF_FRAME},
67 {TM_PRED, INTRA_FRAME, NONE},
69 {NEARMV, LAST_FRAME, ALTREF_FRAME},
70 {NEWMV, LAST_FRAME, ALTREF_FRAME},
71 {NEARMV, GOLDEN_FRAME, NONE},
72 {NEARMV, GOLDEN_FRAME, ALTREF_FRAME},
73 {NEWMV, GOLDEN_FRAME, ALTREF_FRAME},
75 {ZEROMV, LAST_FRAME, NONE},
76 {ZEROMV, GOLDEN_FRAME, NONE},
77 {ZEROMV, ALTREF_FRAME, NONE},
78 {ZEROMV, LAST_FRAME, ALTREF_FRAME},
79 {ZEROMV, GOLDEN_FRAME, ALTREF_FRAME},
81 {H_PRED, INTRA_FRAME, NONE},
82 {V_PRED, INTRA_FRAME, NONE},
83 {D135_PRED, INTRA_FRAME, NONE},
84 {D207_PRED, INTRA_FRAME, NONE},
85 {D153_PRED, INTRA_FRAME, NONE},
86 {D63_PRED, INTRA_FRAME, NONE},
87 {D117_PRED, INTRA_FRAME, NONE},
88 {D45_PRED, INTRA_FRAME, NONE},
91 const REF_DEFINITION vp9_ref_order[MAX_REFS] = {
95 {LAST_FRAME, ALTREF_FRAME},
96 {GOLDEN_FRAME, ALTREF_FRAME},
100 // The baseline rd thresholds for breaking out of the rd loop for
101 // certain modes are assumed to be based on 8x8 blocks.
102 // This table is used to correct for blocks size.
103 // The factors here are << 2 (2 = x0.5, 32 = x8 etc).
104 static int rd_thresh_block_size_factor[BLOCK_SIZES] =
105 {2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32};
107 #define RD_THRESH_MAX_FACT 64
108 #define RD_THRESH_INC 1
109 #define RD_THRESH_POW 1.25
110 #define RD_MULT_EPB_RATIO 64
112 #define MV_COST_WEIGHT 108
113 #define MV_COST_WEIGHT_SUB 120
115 static int raster_block_offset(BLOCK_SIZE plane_bsize,
116 int raster_block, int stride) {
117 const int bw = b_width_log2(plane_bsize);
118 const int y = 4 * (raster_block >> bw);
119 const int x = 4 * (raster_block & ((1 << bw) - 1));
120 return y * stride + x;
122 static int16_t* raster_block_offset_int16(BLOCK_SIZE plane_bsize,
123 int raster_block, int16_t *base) {
124 const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
125 return base + raster_block_offset(plane_bsize, raster_block, stride);
128 static void fill_mode_costs(VP9_COMP *c) {
129 VP9_COMMON *const cm = &c->common;
132 for (i = 0; i < INTRA_MODES; i++)
133 for (j = 0; j < INTRA_MODES; j++)
134 vp9_cost_tokens((int *)c->mb.y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j],
135 vp9_intra_mode_tree);
137 // TODO(rbultje) separate tables for superblock costing?
138 vp9_cost_tokens(c->mb.mbmode_cost, cm->fc.y_mode_prob[1],
139 vp9_intra_mode_tree);
140 vp9_cost_tokens(c->mb.intra_uv_mode_cost[1],
141 cm->fc.uv_mode_prob[INTRA_MODES - 1], vp9_intra_mode_tree);
142 vp9_cost_tokens(c->mb.intra_uv_mode_cost[0],
143 vp9_kf_uv_mode_prob[INTRA_MODES - 1],
144 vp9_intra_mode_tree);
146 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
147 vp9_cost_tokens((int *)c->mb.switchable_interp_costs[i],
148 cm->fc.switchable_interp_prob[i],
149 vp9_switchable_interp_tree);
152 static void fill_token_costs(vp9_coeff_cost *c,
153 vp9_coeff_probs_model (*p)[PLANE_TYPES]) {
156 for (t = TX_4X4; t <= TX_32X32; ++t)
157 for (i = 0; i < PLANE_TYPES; ++i)
158 for (j = 0; j < REF_TYPES; ++j)
159 for (k = 0; k < COEF_BANDS; ++k)
160 for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
161 vp9_prob probs[ENTROPY_NODES];
162 vp9_model_to_full_probs(p[t][i][j][k][l], probs);
163 vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs,
165 vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
167 assert(c[t][i][j][k][0][l][EOB_TOKEN] ==
168 c[t][i][j][k][1][l][EOB_TOKEN]);
172 static const int rd_iifactor[32] = {
173 4, 4, 3, 2, 1, 0, 0, 0,
174 0, 0, 0, 0, 0, 0, 0, 0,
175 0, 0, 0, 0, 0, 0, 0, 0,
176 0, 0, 0, 0, 0, 0, 0, 0,
179 // 3* dc_qlookup[Q]*dc_qlookup[Q];
181 /* values are now correlated to quantizer */
182 static int sad_per_bit16lut[QINDEX_RANGE];
183 static int sad_per_bit4lut[QINDEX_RANGE];
185 void vp9_init_me_luts() {
188 // Initialize the sad lut tables using a formulaic calculation for now
189 // This is to make it easier to resolve the impact of experimental changes
190 // to the quantizer tables.
191 for (i = 0; i < QINDEX_RANGE; i++) {
192 sad_per_bit16lut[i] =
193 (int)((0.0418 * vp9_convert_qindex_to_q(i)) + 2.4107);
194 sad_per_bit4lut[i] = (int)(0.063 * vp9_convert_qindex_to_q(i) + 2.742);
198 int vp9_compute_rd_mult(VP9_COMP *cpi, int qindex) {
199 const int q = vp9_dc_quant(qindex, 0);
200 // TODO(debargha): Adjust the function below
201 int rdmult = 88 * q * q / 25;
202 if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
203 if (cpi->twopass.next_iiratio > 31)
204 rdmult += (rdmult * rd_iifactor[31]) >> 4;
206 rdmult += (rdmult * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
211 static int compute_rd_thresh_factor(int qindex) {
213 // TODO(debargha): Adjust the function below
214 q = (int)(pow(vp9_dc_quant(qindex, 0) / 4.0, RD_THRESH_POW) * 5.12);
220 void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
221 cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
222 cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
225 static void set_block_thresholds(VP9_COMP *cpi) {
226 int i, bsize, segment_id;
227 VP9_COMMON *cm = &cpi->common;
229 for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
231 int segment_qindex = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex);
232 segment_qindex = clamp(segment_qindex + cm->y_dc_delta_q, 0, MAXQ);
233 q = compute_rd_thresh_factor(segment_qindex);
235 for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
236 // Threshold here seem unecessarily harsh but fine given actual
237 // range of values used for cpi->sf.thresh_mult[]
238 int thresh_max = INT_MAX / (q * rd_thresh_block_size_factor[bsize]);
240 for (i = 0; i < MAX_MODES; ++i) {
241 if (cpi->sf.thresh_mult[i] < thresh_max) {
242 cpi->rd_threshes[segment_id][bsize][i] =
243 cpi->sf.thresh_mult[i] * q *
244 rd_thresh_block_size_factor[bsize] / 4;
246 cpi->rd_threshes[segment_id][bsize][i] = INT_MAX;
250 for (i = 0; i < MAX_REFS; ++i) {
251 if (cpi->sf.thresh_mult_sub8x8[i] < thresh_max) {
252 cpi->rd_thresh_sub8x8[segment_id][bsize][i] =
253 cpi->sf.thresh_mult_sub8x8[i] * q *
254 rd_thresh_block_size_factor[bsize] / 4;
256 cpi->rd_thresh_sub8x8[segment_id][bsize][i] = INT_MAX;
263 void vp9_initialize_rd_consts(VP9_COMP *cpi) {
264 VP9_COMMON *cm = &cpi->common;
267 vp9_clear_system_state(); // __asm emms;
269 // Further tests required to see if optimum is different
270 // for key frames, golden frames and arf frames.
271 // if (cpi->common.refresh_golden_frame ||
272 // cpi->common.refresh_alt_ref_frame)
273 qindex = clamp(cm->base_qindex + cm->y_dc_delta_q, 0, MAXQ);
275 cpi->RDDIV = RDDIV_BITS; // in bits (to multiply D by 128)
276 cpi->RDMULT = vp9_compute_rd_mult(cpi, qindex);
278 cpi->mb.errorperbit = cpi->RDMULT / RD_MULT_EPB_RATIO;
279 cpi->mb.errorperbit += (cpi->mb.errorperbit == 0);
281 vp9_set_speed_features(cpi);
283 cpi->mb.select_txfm_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
284 cm->frame_type != KEY_FRAME) ?
287 set_block_thresholds(cpi);
289 fill_token_costs(cpi->mb.token_costs, cm->fc.coef_probs);
291 for (i = 0; i < PARTITION_CONTEXTS; i++)
292 vp9_cost_tokens(cpi->mb.partition_cost[i], get_partition_probs(cm, i),
295 /*rough estimate for costing*/
296 fill_mode_costs(cpi);
298 if (!frame_is_intra_only(cm)) {
299 vp9_build_nmv_cost_table(
300 cpi->mb.nmvjointcost,
301 cm->allow_high_precision_mv ? cpi->mb.nmvcost_hp : cpi->mb.nmvcost,
303 cm->allow_high_precision_mv, 1, 1);
305 for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
306 vp9_cost_tokens((int *)cpi->mb.inter_mode_cost[i],
307 cm->fc.inter_mode_probs[i], vp9_inter_mode_tree);
311 static INLINE void linear_interpolate2(double x, int ntab, int inv_step,
312 const double *tab1, const double *tab2,
313 double *v1, double *v2) {
314 double y = x * inv_step;
317 *v1 = tab1[ntab - 1];
318 *v2 = tab2[ntab - 1];
321 *v1 = tab1[d] * (1 - a) + tab1[d + 1] * a;
322 *v2 = tab2[d] * (1 - a) + tab2[d + 1] * a;
326 static void model_rd_norm(double x, double *R, double *D) {
327 static const int inv_tab_step = 8;
328 static const int tab_size = 120;
329 // NOTE: The tables below must be of the same size
332 // This table models the rate for a Laplacian source
333 // source with given variance when quantized with a uniform quantizer
334 // with given stepsize. The closed form expression is:
335 // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
336 // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
337 // and H(x) is the binary entropy function.
338 static const double rate_tab[] = {
339 64.00, 4.944, 3.949, 3.372, 2.966, 2.655, 2.403, 2.194,
340 2.014, 1.858, 1.720, 1.596, 1.485, 1.384, 1.291, 1.206,
341 1.127, 1.054, 0.986, 0.923, 0.863, 0.808, 0.756, 0.708,
342 0.662, 0.619, 0.579, 0.541, 0.506, 0.473, 0.442, 0.412,
343 0.385, 0.359, 0.335, 0.313, 0.291, 0.272, 0.253, 0.236,
344 0.220, 0.204, 0.190, 0.177, 0.165, 0.153, 0.142, 0.132,
345 0.123, 0.114, 0.106, 0.099, 0.091, 0.085, 0.079, 0.073,
346 0.068, 0.063, 0.058, 0.054, 0.050, 0.047, 0.043, 0.040,
347 0.037, 0.034, 0.032, 0.029, 0.027, 0.025, 0.023, 0.022,
348 0.020, 0.019, 0.017, 0.016, 0.015, 0.014, 0.013, 0.012,
349 0.011, 0.010, 0.009, 0.008, 0.008, 0.007, 0.007, 0.006,
350 0.006, 0.005, 0.005, 0.005, 0.004, 0.004, 0.004, 0.003,
351 0.003, 0.003, 0.003, 0.002, 0.002, 0.002, 0.002, 0.002,
352 0.002, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001,
353 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.000,
355 // Normalized distortion
356 // This table models the normalized distortion for a Laplacian source
357 // source with given variance when quantized with a uniform quantizer
358 // with given stepsize. The closed form expression is:
359 // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
360 // where x = qpstep / sqrt(variance)
361 // Note the actual distortion is Dn * variance.
362 static const double dist_tab[] = {
363 0.000, 0.001, 0.005, 0.012, 0.021, 0.032, 0.045, 0.061,
364 0.079, 0.098, 0.119, 0.142, 0.166, 0.190, 0.216, 0.242,
365 0.269, 0.296, 0.324, 0.351, 0.378, 0.405, 0.432, 0.458,
366 0.484, 0.509, 0.534, 0.557, 0.580, 0.603, 0.624, 0.645,
367 0.664, 0.683, 0.702, 0.719, 0.735, 0.751, 0.766, 0.780,
368 0.794, 0.807, 0.819, 0.830, 0.841, 0.851, 0.861, 0.870,
369 0.878, 0.886, 0.894, 0.901, 0.907, 0.913, 0.919, 0.925,
370 0.930, 0.935, 0.939, 0.943, 0.947, 0.951, 0.954, 0.957,
371 0.960, 0.963, 0.966, 0.968, 0.971, 0.973, 0.975, 0.976,
372 0.978, 0.980, 0.981, 0.982, 0.984, 0.985, 0.986, 0.987,
373 0.988, 0.989, 0.990, 0.990, 0.991, 0.992, 0.992, 0.993,
374 0.993, 0.994, 0.994, 0.995, 0.995, 0.996, 0.996, 0.996,
375 0.996, 0.997, 0.997, 0.997, 0.997, 0.998, 0.998, 0.998,
376 0.998, 0.998, 0.998, 0.999, 0.999, 0.999, 0.999, 0.999,
377 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 1.000,
380 assert(sizeof(rate_tab) == tab_size * sizeof(rate_tab[0]);
381 assert(sizeof(dist_tab) == tab_size * sizeof(dist_tab[0]);
382 assert(sizeof(rate_tab) == sizeof(dist_tab));
385 linear_interpolate2(x, tab_size, inv_tab_step,
386 rate_tab, dist_tab, R, D);
389 static void model_rd_from_var_lapndz(int var, int n, int qstep,
390 int *rate, int64_t *dist) {
391 // This function models the rate and distortion for a Laplacian
392 // source with given variance when quantized with a uniform quantizer
393 // with given stepsize. The closed form expressions are in:
394 // Hang and Chen, "Source Model for transform video coder and its
395 // application - Part I: Fundamental Theory", IEEE Trans. Circ.
396 // Sys. for Video Tech., April 1997.
397 vp9_clear_system_state();
398 if (var == 0 || n == 0) {
403 double s2 = (double) var / n;
404 double x = qstep / sqrt(s2);
405 model_rd_norm(x, &R, &D);
406 *rate = (int)((n << 8) * R + 0.5);
407 *dist = (int)(var * D + 0.5);
409 vp9_clear_system_state();
412 static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
413 MACROBLOCK *x, MACROBLOCKD *xd,
414 int *out_rate_sum, int64_t *out_dist_sum) {
415 // Note our transform coeffs are 8 times an orthogonal transform.
416 // Hence quantizer step is also 8 times. To get effective quantizer
417 // we need to divide by 8 before sending to modeling function.
418 int i, rate_sum = 0, dist_sum = 0;
420 for (i = 0; i < MAX_MB_PLANE; ++i) {
421 struct macroblock_plane *const p = &x->plane[i];
422 struct macroblockd_plane *const pd = &xd->plane[i];
423 const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
426 (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
427 pd->dst.buf, pd->dst.stride, &x->pred_sse);
428 // sse works better than var, since there is no dc prediction used
429 model_rd_from_var_lapndz(x->pred_sse, 1 << num_pels_log2_lookup[bs],
430 pd->dequant[1] >> 3, &rate, &dist);
433 dist_sum += (int)dist;
436 *out_rate_sum = rate_sum;
437 *out_dist_sum = dist_sum << 4;
440 static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE bsize,
442 MACROBLOCK *x, MACROBLOCKD *xd,
443 int *out_rate_sum, int64_t *out_dist_sum,
447 struct macroblock_plane *const p = &x->plane[0];
448 struct macroblockd_plane *const pd = &xd->plane[0];
449 const int width = 4 << num_4x4_blocks_wide_lookup[bsize];
450 const int height = 4 << num_4x4_blocks_high_lookup[bsize];
452 int64_t dist_sum = 0;
453 const int t = 4 << tx_size;
455 if (tx_size == TX_4X4) {
457 } else if (tx_size == TX_8X8) {
459 } else if (tx_size == TX_16X16) {
461 } else if (tx_size == TX_32X32) {
468 for (j = 0; j < height; j += t) {
469 for (k = 0; k < width; k += t) {
473 cpi->fn_ptr[bs].vf(&p->src.buf[j * p->src.stride + k], p->src.stride,
474 &pd->dst.buf[j * pd->dst.stride + k], pd->dst.stride,
476 // sse works better than var, since there is no dc prediction used
477 model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3, &rate, &dist);
480 *out_skip &= (rate < 1024);
484 *out_rate_sum = rate_sum;
485 *out_dist_sum = dist_sum << 4;
488 int64_t vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff,
489 intptr_t block_size, int64_t *ssz) {
491 int64_t error = 0, sqcoeff = 0;
493 for (i = 0; i < block_size; i++) {
494 int this_diff = coeff[i] - dqcoeff[i];
495 error += (unsigned)this_diff * this_diff;
496 sqcoeff += (unsigned) coeff[i] * coeff[i];
503 /* The trailing '0' is a terminator which is used inside cost_coeffs() to
504 * decide whether to include cost of a trailing EOB node or not (i.e. we
505 * can skip this if the last coefficient in this transform block, e.g. the
506 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
508 static const int16_t band_counts[TX_SIZES][8] = {
509 { 1, 2, 3, 4, 3, 16 - 13, 0 },
510 { 1, 2, 3, 4, 11, 64 - 21, 0 },
511 { 1, 2, 3, 4, 11, 256 - 21, 0 },
512 { 1, 2, 3, 4, 11, 1024 - 21, 0 },
515 static INLINE int cost_coeffs(MACROBLOCK *x,
516 int plane, int block,
517 ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
519 const int16_t *scan, const int16_t *nb) {
520 MACROBLOCKD *const xd = &x->e_mbd;
521 MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
522 struct macroblock_plane *p = &x->plane[plane];
523 struct macroblockd_plane *pd = &xd->plane[plane];
524 const PLANE_TYPE type = pd->plane_type;
525 const int16_t *band_count = &band_counts[tx_size][1];
526 const int eob = p->eobs[block];
527 const int16_t *const qcoeff_ptr = BLOCK_OFFSET(p->qcoeff, block);
528 const int ref = mbmi->ref_frame[0] != INTRA_FRAME;
529 unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
530 x->token_costs[tx_size][type][ref];
531 const ENTROPY_CONTEXT above_ec = !!*A, left_ec = !!*L;
532 uint8_t *p_tok = x->token_cache;
533 int pt = combine_entropy_contexts(above_ec, left_ec);
536 // Check for consistency of tx_size with mode info
537 assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size
538 : get_uv_tx_size(mbmi) == tx_size);
542 cost = token_costs[0][0][pt][EOB_TOKEN];
545 int band_left = *band_count++;
548 int v = qcoeff_ptr[0];
549 int prev_t = vp9_dct_value_tokens_ptr[v].token;
550 cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
551 p_tok[0] = vp9_pt_energy_class[prev_t];
555 for (c = 1; c < eob; c++) {
556 const int rc = scan[c];
560 t = vp9_dct_value_tokens_ptr[v].token;
561 pt = get_coef_context(nb, p_tok, c);
562 cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v];
563 p_tok[rc] = vp9_pt_energy_class[t];
566 band_left = *band_count++;
573 pt = get_coef_context(nb, p_tok, c);
574 cost += (*token_costs)[0][pt][EOB_TOKEN];
578 // is eob first coefficient;
584 static void dist_block(int plane, int block, TX_SIZE tx_size, void *arg) {
585 const int ss_txfrm_size = tx_size << 1;
586 struct rdcost_block_args* args = arg;
587 MACROBLOCK* const x = args->x;
588 MACROBLOCKD* const xd = &x->e_mbd;
589 struct macroblock_plane *const p = &x->plane[plane];
590 struct macroblockd_plane *const pd = &xd->plane[plane];
592 int shift = args->tx_size == TX_32X32 ? 0 : 2;
593 int16_t *const coeff = BLOCK_OFFSET(p->coeff, block);
594 int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
595 args->dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
597 args->sse = this_sse >> shift;
599 if (x->skip_encode && !is_inter_block(&xd->mi_8x8[0]->mbmi)) {
600 // TODO(jingning): tune the model to better capture the distortion.
601 int64_t p = (pd->dequant[1] * pd->dequant[1] *
602 (1 << ss_txfrm_size)) >> (shift + 2);
603 args->dist += (p >> 4);
608 static void rate_block(int plane, int block, BLOCK_SIZE plane_bsize,
609 TX_SIZE tx_size, void *arg) {
610 struct rdcost_block_args* args = arg;
613 txfrm_block_to_raster_xy(plane_bsize, args->tx_size, block, &x_idx, &y_idx);
615 args->rate = cost_coeffs(args->x, plane, block, args->t_above + x_idx,
616 args->t_left + y_idx, args->tx_size,
617 args->scan, args->nb);
620 static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
621 TX_SIZE tx_size, void *arg) {
622 struct rdcost_block_args *args = arg;
623 MACROBLOCK *const x = args->x;
624 MACROBLOCKD *const xd = &x->e_mbd;
625 struct encode_b_args encode_args = {x, NULL};
626 int64_t rd1, rd2, rd;
631 if (!is_inter_block(&xd->mi_8x8[0]->mbmi))
632 vp9_encode_block_intra(plane, block, plane_bsize, tx_size, &encode_args);
634 vp9_xform_quant(plane, block, plane_bsize, tx_size, &encode_args);
636 dist_block(plane, block, tx_size, args);
637 rate_block(plane, block, plane_bsize, tx_size, args);
638 rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist);
639 rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse);
641 // TODO(jingning): temporarily enabled only for luma component
644 x->zcoeff_blk[tx_size][block] = !x->plane[plane].eobs[block] ||
645 (rd1 > rd2 && !xd->lossless);
647 args->this_rate += args->rate;
648 args->this_dist += args->dist;
649 args->this_sse += args->sse;
652 if (args->this_rd > args->best_rd) {
658 void vp9_get_entropy_contexts(TX_SIZE tx_size,
659 ENTROPY_CONTEXT t_above[16], ENTROPY_CONTEXT t_left[16],
660 const ENTROPY_CONTEXT *above, const ENTROPY_CONTEXT *left,
661 int num_4x4_w, int num_4x4_h) {
665 vpx_memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
666 vpx_memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
669 for (i = 0; i < num_4x4_w; i += 2)
670 t_above[i] = !!*(const uint16_t *)&above[i];
671 for (i = 0; i < num_4x4_h; i += 2)
672 t_left[i] = !!*(const uint16_t *)&left[i];
675 for (i = 0; i < num_4x4_w; i += 4)
676 t_above[i] = !!*(const uint32_t *)&above[i];
677 for (i = 0; i < num_4x4_h; i += 4)
678 t_left[i] = !!*(const uint32_t *)&left[i];
681 for (i = 0; i < num_4x4_w; i += 8)
682 t_above[i] = !!*(const uint64_t *)&above[i];
683 for (i = 0; i < num_4x4_h; i += 8)
684 t_left[i] = !!*(const uint64_t *)&left[i];
687 assert(0 && "Invalid transform size.");
691 static void init_rdcost_stack(MACROBLOCK *x, TX_SIZE tx_size,
692 const int num_4x4_w, const int num_4x4_h,
693 const int64_t ref_rdcost,
694 struct rdcost_block_args *arg) {
695 vpx_memset(arg, 0, sizeof(struct rdcost_block_args));
697 arg->tx_size = tx_size;
700 arg->best_rd = ref_rdcost;
703 static void txfm_rd_in_plane(MACROBLOCK *x,
704 struct rdcost_block_args *rd_stack,
705 int *rate, int64_t *distortion,
706 int *skippable, int64_t *sse,
707 int64_t ref_best_rd, int plane,
708 BLOCK_SIZE bsize, TX_SIZE tx_size) {
709 MACROBLOCKD *const xd = &x->e_mbd;
710 struct macroblockd_plane *const pd = &xd->plane[plane];
711 const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
712 const int num_4x4_w = num_4x4_blocks_wide_lookup[bs];
713 const int num_4x4_h = num_4x4_blocks_high_lookup[bs];
714 const scan_order *so;
716 init_rdcost_stack(x, tx_size, num_4x4_w, num_4x4_h,
717 ref_best_rd, rd_stack);
719 xd->mi_8x8[0]->mbmi.tx_size = tx_size;
721 vp9_get_entropy_contexts(tx_size, rd_stack->t_above, rd_stack->t_left,
722 pd->above_context, pd->left_context,
723 num_4x4_w, num_4x4_h);
725 so = get_scan(xd, tx_size, pd->plane_type, 0);
726 rd_stack->scan = so->scan;
727 rd_stack->nb = so->neighbors;
729 foreach_transformed_block_in_plane(xd, bsize, plane,
730 block_rd_txfm, rd_stack);
731 if (rd_stack->skip) {
733 *distortion = INT64_MAX;
737 *distortion = rd_stack->this_dist;
738 *rate = rd_stack->this_rate;
739 *sse = rd_stack->this_sse;
740 *skippable = vp9_is_skippable_in_plane(x, bsize, plane);
744 static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x,
745 int *rate, int64_t *distortion,
746 int *skip, int64_t *sse,
749 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
750 VP9_COMMON *const cm = &cpi->common;
751 const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
752 MACROBLOCKD *const xd = &x->e_mbd;
753 MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
755 mbmi->tx_size = MIN(max_tx_size, largest_tx_size);
757 txfm_rd_in_plane(x, &cpi->rdcost_stack, rate, distortion, skip,
758 &sse[mbmi->tx_size], ref_best_rd, 0, bs,
760 cpi->tx_stepdown_count[0]++;
763 static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
764 int (*r)[2], int *rate,
765 int64_t *d, int64_t *distortion,
767 int64_t tx_cache[TX_MODES],
769 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
770 VP9_COMMON *const cm = &cpi->common;
771 MACROBLOCKD *const xd = &x->e_mbd;
772 MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
773 vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
774 int64_t rd[TX_SIZES][2];
777 const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
778 int64_t best_rd = INT64_MAX;
779 TX_SIZE best_tx = TX_4X4;
781 const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs);
782 assert(skip_prob > 0);
783 s0 = vp9_cost_bit(skip_prob, 0);
784 s1 = vp9_cost_bit(skip_prob, 1);
786 for (n = TX_4X4; n <= max_tx_size; n++) {
788 if (r[n][0] < INT_MAX) {
789 for (m = 0; m <= n - (n == max_tx_size); m++) {
791 r[n][1] += vp9_cost_zero(tx_probs[m]);
793 r[n][1] += vp9_cost_one(tx_probs[m]);
796 if (d[n] == INT64_MAX) {
797 rd[n][0] = rd[n][1] = INT64_MAX;
799 rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
801 rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
802 rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
805 if (rd[n][1] < best_rd) {
810 mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ?
811 best_tx : MIN(max_tx_size, max_mode_tx_size);
814 *distortion = d[mbmi->tx_size];
815 *rate = r[mbmi->tx_size][cm->tx_mode == TX_MODE_SELECT];
816 *skip = s[mbmi->tx_size];
818 tx_cache[ONLY_4X4] = rd[TX_4X4][0];
819 tx_cache[ALLOW_8X8] = rd[TX_8X8][0];
820 tx_cache[ALLOW_16X16] = rd[MIN(max_tx_size, TX_16X16)][0];
821 tx_cache[ALLOW_32X32] = rd[MIN(max_tx_size, TX_32X32)][0];
823 if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
824 tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
825 cpi->tx_stepdown_count[0]++;
826 } else if (max_tx_size >= TX_16X16 && best_tx == TX_16X16) {
827 tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
828 cpi->tx_stepdown_count[max_tx_size - TX_16X16]++;
829 } else if (rd[TX_8X8][1] < rd[TX_4X4][1]) {
830 tx_cache[TX_MODE_SELECT] = rd[TX_8X8][1];
831 cpi->tx_stepdown_count[max_tx_size - TX_8X8]++;
833 tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1];
834 cpi->tx_stepdown_count[max_tx_size - TX_4X4]++;
838 static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x,
839 int (*r)[2], int *rate,
840 int64_t *d, int64_t *distortion,
841 int *s, int *skip, int64_t *sse,
844 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
845 VP9_COMMON *const cm = &cpi->common;
846 MACROBLOCKD *const xd = &x->e_mbd;
847 MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
848 vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
849 int64_t rd[TX_SIZES][2];
852 double scale_rd[TX_SIZES] = {1.73, 1.44, 1.20, 1.00};
853 const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
854 int64_t best_rd = INT64_MAX;
855 TX_SIZE best_tx = TX_4X4;
857 const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs);
858 assert(skip_prob > 0);
859 s0 = vp9_cost_bit(skip_prob, 0);
860 s1 = vp9_cost_bit(skip_prob, 1);
862 for (n = TX_4X4; n <= max_tx_size; n++) {
863 double scale = scale_rd[n];
865 for (m = 0; m <= n - (n == max_tx_size); m++) {
867 r[n][1] += vp9_cost_zero(tx_probs[m]);
869 r[n][1] += vp9_cost_one(tx_probs[m]);
872 rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]) * scale;
874 rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]) * scale;
875 rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]) * scale;
877 if (rd[n][1] < best_rd) {
883 mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ?
884 best_tx : MIN(max_tx_size, max_mode_tx_size);
886 // Actually encode using the chosen mode if a model was used, but do not
887 // update the r, d costs
888 txfm_rd_in_plane(x, &cpi->rdcost_stack, rate, distortion, skip,
889 &sse[mbmi->tx_size], ref_best_rd, 0, bs, mbmi->tx_size);
891 if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
892 cpi->tx_stepdown_count[0]++;
893 } else if (max_tx_size >= TX_16X16 && best_tx == TX_16X16) {
894 cpi->tx_stepdown_count[max_tx_size - TX_16X16]++;
895 } else if (rd[TX_8X8][1] <= rd[TX_4X4][1]) {
896 cpi->tx_stepdown_count[max_tx_size - TX_8X8]++;
898 cpi->tx_stepdown_count[max_tx_size - TX_4X4]++;
902 static void super_block_yrd(VP9_COMP *cpi,
903 MACROBLOCK *x, int *rate, int64_t *distortion,
904 int *skip, int64_t *psse, BLOCK_SIZE bs,
905 int64_t txfm_cache[TX_MODES],
906 int64_t ref_best_rd) {
907 int r[TX_SIZES][2], s[TX_SIZES];
908 int64_t d[TX_SIZES], sse[TX_SIZES];
909 MACROBLOCKD *xd = &x->e_mbd;
910 MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
911 struct rdcost_block_args *rdcost_stack = &cpi->rdcost_stack;
912 const int b_inter_mode = is_inter_block(mbmi);
913 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
917 assert(bs == mbmi->sb_type);
919 vp9_subtract_sby(x, bs);
921 if (cpi->sf.tx_size_search_method == USE_LARGESTALL ||
922 (cpi->sf.tx_size_search_method != USE_FULL_RD &&
924 vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t));
925 choose_largest_txfm_size(cpi, x, rate, distortion, skip, sse,
928 *psse = sse[mbmi->tx_size];
932 if (cpi->sf.tx_size_search_method == USE_LARGESTINTRA_MODELINTER &&
934 for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size)
935 model_rd_for_sb_y_tx(cpi, bs, tx_size, x, xd,
936 &r[tx_size][0], &d[tx_size], &s[tx_size]);
937 choose_txfm_size_from_modelrd(cpi, x, r, rate, d, distortion, s,
938 skip, sse, ref_best_rd, bs);
940 for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size)
941 txfm_rd_in_plane(x, rdcost_stack, &r[tx_size][0], &d[tx_size],
942 &s[tx_size], &sse[tx_size],
943 ref_best_rd, 0, bs, tx_size);
944 choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s,
945 skip, txfm_cache, bs);
948 *psse = sse[mbmi->tx_size];
951 static int conditional_skipintra(MB_PREDICTION_MODE mode,
952 MB_PREDICTION_MODE best_intra_mode) {
953 if (mode == D117_PRED &&
954 best_intra_mode != V_PRED &&
955 best_intra_mode != D135_PRED)
957 if (mode == D63_PRED &&
958 best_intra_mode != V_PRED &&
959 best_intra_mode != D45_PRED)
961 if (mode == D207_PRED &&
962 best_intra_mode != H_PRED &&
963 best_intra_mode != D45_PRED)
965 if (mode == D153_PRED &&
966 best_intra_mode != H_PRED &&
967 best_intra_mode != D135_PRED)
972 static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
973 MB_PREDICTION_MODE *best_mode,
975 ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
976 int *bestrate, int *bestratey,
977 int64_t *bestdistortion,
978 BLOCK_SIZE bsize, int64_t rd_thresh) {
979 MB_PREDICTION_MODE mode;
980 MACROBLOCKD *xd = &x->e_mbd;
981 int64_t best_rd = rd_thresh;
984 struct macroblock_plane *p = &x->plane[0];
985 struct macroblockd_plane *pd = &xd->plane[0];
986 const int src_stride = p->src.stride;
987 const int dst_stride = pd->dst.stride;
988 const uint8_t *src_init = &p->src.buf[raster_block_offset(BLOCK_8X8, ib,
990 uint8_t *dst_init = &pd->dst.buf[raster_block_offset(BLOCK_8X8, ib,
992 int16_t *src_diff, *coeff;
994 ENTROPY_CONTEXT ta[2], tempa[2];
995 ENTROPY_CONTEXT tl[2], templ[2];
997 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
998 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1000 uint8_t best_dst[8 * 8];
1004 vpx_memcpy(ta, a, sizeof(ta));
1005 vpx_memcpy(tl, l, sizeof(tl));
1006 xd->mi_8x8[0]->mbmi.tx_size = TX_4X4;
1008 for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
1012 if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode)))
1015 // Only do the oblique modes if the best so far is
1016 // one of the neighboring directional modes
1017 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
1018 if (conditional_skipintra(mode, *best_mode))
1022 rate = bmode_costs[mode];
1025 vpx_memcpy(tempa, ta, sizeof(ta));
1026 vpx_memcpy(templ, tl, sizeof(tl));
1028 for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
1029 for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
1031 const scan_order *so;
1032 const uint8_t *src = src_init + idx * 4 + idy * 4 * src_stride;
1033 uint8_t *dst = dst_init + idx * 4 + idy * 4 * dst_stride;
1034 const int block = ib + idy * 2 + idx;
1036 xd->mi_8x8[0]->bmi[block].as_mode = mode;
1037 src_diff = raster_block_offset_int16(BLOCK_8X8, block, p->src_diff);
1038 coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
1039 vp9_predict_intra_block(xd, block, 1,
1041 x->skip_encode ? src : dst,
1042 x->skip_encode ? src_stride : dst_stride,
1043 dst, dst_stride, idx, idy, 0);
1044 vp9_subtract_block(4, 4, src_diff, 8,
1048 tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block);
1049 so = &vp9_scan_orders[TX_4X4][tx_type];
1051 if (tx_type != DCT_DCT)
1052 vp9_short_fht4x4(src_diff, coeff, 8, tx_type);
1054 x->fwd_txm4x4(src_diff, coeff, 8);
1056 vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
1058 ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
1059 so->scan, so->neighbors);
1060 distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block),
1062 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
1065 if (tx_type != DCT_DCT)
1066 vp9_iht4x4_16_add(BLOCK_OFFSET(pd->dqcoeff, block),
1067 dst, pd->dst.stride, tx_type);
1069 xd->itxm_add(BLOCK_OFFSET(pd->dqcoeff, block), dst, pd->dst.stride,
1075 this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
1077 if (this_rd < best_rd) {
1080 *bestdistortion = distortion;
1083 vpx_memcpy(a, tempa, sizeof(tempa));
1084 vpx_memcpy(l, templ, sizeof(templ));
1085 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
1086 vpx_memcpy(best_dst + idy * 8, dst_init + idy * dst_stride,
1087 num_4x4_blocks_wide * 4);
1093 if (best_rd >= rd_thresh || x->skip_encode)
1096 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
1097 vpx_memcpy(dst_init + idy * dst_stride, best_dst + idy * 8,
1098 num_4x4_blocks_wide * 4);
1103 static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP * const cpi,
1104 MACROBLOCK * const mb,
1107 int64_t * const distortion,
1110 MACROBLOCKD *const xd = &mb->e_mbd;
1111 MODE_INFO *const mic = xd->mi_8x8[0];
1112 const MODE_INFO *above_mi = xd->mi_8x8[-xd->mode_info_stride];
1113 const MODE_INFO *left_mi = xd->left_available ? xd->mi_8x8[-1] : NULL;
1114 const BLOCK_SIZE bsize = xd->mi_8x8[0]->mbmi.sb_type;
1115 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1116 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1119 int64_t total_distortion = 0;
1121 int64_t total_rd = 0;
1122 ENTROPY_CONTEXT t_above[4], t_left[4];
1125 vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
1126 vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
1128 bmode_costs = mb->mbmode_cost;
1130 // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block.
1131 for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
1132 for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
1133 MB_PREDICTION_MODE best_mode = DC_PRED;
1134 int r = INT_MAX, ry = INT_MAX;
1135 int64_t d = INT64_MAX, this_rd = INT64_MAX;
1137 if (cpi->common.frame_type == KEY_FRAME) {
1138 const MB_PREDICTION_MODE A = above_block_mode(mic, above_mi, i);
1139 const MB_PREDICTION_MODE L = left_block_mode(mic, left_mi, i);
1141 bmode_costs = mb->y_mode_costs[A][L];
1144 this_rd = rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs,
1145 t_above + idx, t_left + idy, &r, &ry, &d,
1146 bsize, best_rd - total_rd);
1147 if (this_rd >= best_rd - total_rd)
1150 total_rd += this_rd;
1152 total_distortion += d;
1155 mic->bmi[i].as_mode = best_mode;
1156 for (j = 1; j < num_4x4_blocks_high; ++j)
1157 mic->bmi[i + j * 2].as_mode = best_mode;
1158 for (j = 1; j < num_4x4_blocks_wide; ++j)
1159 mic->bmi[i + j].as_mode = best_mode;
1161 if (total_rd >= best_rd)
1167 *rate_y = tot_rate_y;
1168 *distortion = total_distortion;
1169 mic->mbmi.mode = mic->bmi[3].as_mode;
1171 return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion);
1174 static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
1175 int *rate, int *rate_tokenonly,
1176 int64_t *distortion, int *skippable,
1178 int64_t tx_cache[TX_MODES],
1180 MB_PREDICTION_MODE mode;
1181 MB_PREDICTION_MODE mode_selected = DC_PRED;
1182 MACROBLOCKD *const xd = &x->e_mbd;
1183 MODE_INFO *const mic = xd->mi_8x8[0];
1184 int this_rate, this_rate_tokenonly, s;
1185 int64_t this_distortion, this_rd;
1186 TX_SIZE best_tx = TX_4X4;
1188 int *bmode_costs = x->mbmode_cost;
1190 if (cpi->sf.tx_size_search_method == USE_FULL_RD)
1191 for (i = 0; i < TX_MODES; i++)
1192 tx_cache[i] = INT64_MAX;
1194 /* Y Search for intra prediction mode */
1195 for (mode = DC_PRED; mode <= TM_PRED; mode++) {
1196 int64_t local_tx_cache[TX_MODES];
1197 MODE_INFO *above_mi = xd->mi_8x8[-xd->mode_info_stride];
1198 MODE_INFO *left_mi = xd->left_available ? xd->mi_8x8[-1] : NULL;
1200 if (!(cpi->sf.intra_y_mode_mask[max_txsize_lookup[bsize]] & (1 << mode)))
1203 if (cpi->common.frame_type == KEY_FRAME) {
1204 const MB_PREDICTION_MODE A = above_block_mode(mic, above_mi, 0);
1205 const MB_PREDICTION_MODE L = left_block_mode(mic, left_mi, 0);
1207 bmode_costs = x->y_mode_costs[A][L];
1209 mic->mbmi.mode = mode;
1211 super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, NULL,
1212 bsize, local_tx_cache, best_rd);
1214 if (this_rate_tokenonly == INT_MAX)
1217 this_rate = this_rate_tokenonly + bmode_costs[mode];
1218 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
1220 if (this_rd < best_rd) {
1221 mode_selected = mode;
1223 best_tx = mic->mbmi.tx_size;
1225 *rate_tokenonly = this_rate_tokenonly;
1226 *distortion = this_distortion;
1230 if (cpi->sf.tx_size_search_method == USE_FULL_RD && this_rd < INT64_MAX) {
1231 for (i = 0; i < TX_MODES && local_tx_cache[i] < INT64_MAX; i++) {
1232 const int64_t adj_rd = this_rd + local_tx_cache[i] -
1233 local_tx_cache[cpi->common.tx_mode];
1234 if (adj_rd < tx_cache[i]) {
1235 tx_cache[i] = adj_rd;
1241 mic->mbmi.mode = mode_selected;
1242 mic->mbmi.tx_size = best_tx;
1247 static void super_block_uvrd(VP9_COMP *const cpi, MACROBLOCK *x,
1248 int *rate, int64_t *distortion, int *skippable,
1249 int64_t *sse, BLOCK_SIZE bsize,
1250 int64_t ref_best_rd) {
1251 MACROBLOCKD *const xd = &x->e_mbd;
1252 MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
1253 TX_SIZE uv_txfm_size = get_uv_tx_size(mbmi);
1255 int pnrate = 0, pnskip = 1;
1256 int64_t pndist = 0, pnsse = 0;
1258 if (ref_best_rd < 0)
1261 if (is_inter_block(mbmi))
1262 vp9_subtract_sbuv(x, bsize);
1269 for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
1270 txfm_rd_in_plane(x, &cpi->rdcost_stack, &pnrate, &pndist, &pnskip, &pnsse,
1271 ref_best_rd, plane, bsize, uv_txfm_size);
1272 if (pnrate == INT_MAX)
1275 *distortion += pndist;
1277 *skippable &= pnskip;
1283 *distortion = INT64_MAX;
1289 static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
1290 PICK_MODE_CONTEXT *ctx,
1291 int *rate, int *rate_tokenonly,
1292 int64_t *distortion, int *skippable,
1293 BLOCK_SIZE bsize, TX_SIZE max_tx_size) {
1294 MB_PREDICTION_MODE mode;
1295 MB_PREDICTION_MODE mode_selected = DC_PRED;
1296 int64_t best_rd = INT64_MAX, this_rd;
1297 int this_rate_tokenonly, this_rate, s;
1298 int64_t this_distortion, this_sse;
1300 for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
1301 if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode)))
1304 x->e_mbd.mi_8x8[0]->mbmi.uv_mode = mode;
1306 super_block_uvrd(cpi, x, &this_rate_tokenonly,
1307 &this_distortion, &s, &this_sse, bsize, best_rd);
1308 if (this_rate_tokenonly == INT_MAX)
1310 this_rate = this_rate_tokenonly +
1311 x->intra_uv_mode_cost[cpi->common.frame_type][mode];
1312 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
1314 if (this_rd < best_rd) {
1315 mode_selected = mode;
1318 *rate_tokenonly = this_rate_tokenonly;
1319 *distortion = this_distortion;
1321 if (!x->select_txfm_size) {
1323 struct macroblock_plane *const p = x->plane;
1324 struct macroblockd_plane *const pd = x->e_mbd.plane;
1325 for (i = 1; i < MAX_MB_PLANE; ++i) {
1326 p[i].coeff = ctx->coeff_pbuf[i][2];
1327 p[i].qcoeff = ctx->qcoeff_pbuf[i][2];
1328 pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][2];
1329 p[i].eobs = ctx->eobs_pbuf[i][2];
1331 ctx->coeff_pbuf[i][2] = ctx->coeff_pbuf[i][0];
1332 ctx->qcoeff_pbuf[i][2] = ctx->qcoeff_pbuf[i][0];
1333 ctx->dqcoeff_pbuf[i][2] = ctx->dqcoeff_pbuf[i][0];
1334 ctx->eobs_pbuf[i][2] = ctx->eobs_pbuf[i][0];
1336 ctx->coeff_pbuf[i][0] = p[i].coeff;
1337 ctx->qcoeff_pbuf[i][0] = p[i].qcoeff;
1338 ctx->dqcoeff_pbuf[i][0] = pd[i].dqcoeff;
1339 ctx->eobs_pbuf[i][0] = p[i].eobs;
1345 x->e_mbd.mi_8x8[0]->mbmi.uv_mode = mode_selected;
1349 static int64_t rd_sbuv_dcpred(VP9_COMP *cpi, MACROBLOCK *x,
1350 int *rate, int *rate_tokenonly,
1351 int64_t *distortion, int *skippable,
1356 x->e_mbd.mi_8x8[0]->mbmi.uv_mode = DC_PRED;
1357 super_block_uvrd(cpi, x, rate_tokenonly, distortion,
1358 skippable, &this_sse, bsize, INT64_MAX);
1359 *rate = *rate_tokenonly +
1360 x->intra_uv_mode_cost[cpi->common.frame_type][DC_PRED];
1361 this_rd = RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
1366 static void choose_intra_uv_mode(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
1367 BLOCK_SIZE bsize, TX_SIZE max_tx_size,
1368 int *rate_uv, int *rate_uv_tokenonly,
1369 int64_t *dist_uv, int *skip_uv,
1370 MB_PREDICTION_MODE *mode_uv) {
1371 MACROBLOCK *const x = &cpi->mb;
1373 // Use an estimated rd for uv_intra based on DC_PRED if the
1374 // appropriate speed flag is set.
1375 if (cpi->sf.use_uv_intra_rd_estimate) {
1376 rd_sbuv_dcpred(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
1377 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
1378 // Else do a proper rd search for each possible transform size that may
1379 // be considered in the main rd loop.
1381 rd_pick_intra_sbuv_mode(cpi, x, ctx,
1382 rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
1383 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize, max_tx_size);
1385 *mode_uv = x->e_mbd.mi_8x8[0]->mbmi.uv_mode;
1388 static int cost_mv_ref(VP9_COMP *cpi, MB_PREDICTION_MODE mode,
1390 MACROBLOCK *const x = &cpi->mb;
1391 MACROBLOCKD *const xd = &x->e_mbd;
1392 const int segment_id = xd->mi_8x8[0]->mbmi.segment_id;
1394 // Don't account for mode here if segment skip is enabled.
1395 if (!vp9_segfeature_active(&cpi->common.seg, segment_id, SEG_LVL_SKIP)) {
1396 assert(is_inter_mode(mode));
1397 return x->inter_mode_cost[mode_context][INTER_OFFSET(mode)];
1403 void vp9_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv) {
1404 x->e_mbd.mi_8x8[0]->mbmi.mode = mb;
1405 x->e_mbd.mi_8x8[0]->mbmi.mv[0].as_int = mv->as_int;
1408 static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
1411 int mi_row, int mi_col,
1412 int_mv single_newmv[MAX_REF_FRAMES],
1415 static int labels2mode(MACROBLOCK *x, int i,
1416 MB_PREDICTION_MODE this_mode,
1417 int_mv *this_mv, int_mv *this_second_mv,
1418 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
1419 int_mv seg_mvs[MAX_REF_FRAMES],
1420 int_mv *best_ref_mv,
1421 int_mv *second_best_ref_mv,
1422 int *mvjcost, int *mvcost[2], VP9_COMP *cpi) {
1423 MACROBLOCKD *const xd = &x->e_mbd;
1424 MODE_INFO *const mic = xd->mi_8x8[0];
1425 MB_MODE_INFO *mbmi = &mic->mbmi;
1426 int cost = 0, thismvcost = 0;
1428 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
1429 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
1430 const int has_second_rf = has_second_ref(mbmi);
1432 /* We have to be careful retrieving previously-encoded motion vectors.
1433 Ones from this macroblock have to be pulled from the BLOCKD array
1434 as they have not yet made it to the bmi array in our MB_MODE_INFO. */
1435 MB_PREDICTION_MODE m;
1437 // the only time we should do costing for new motion vector or mode
1438 // is when we are on a new label (jbb May 08, 2007)
1439 switch (m = this_mode) {
1441 this_mv->as_int = seg_mvs[mbmi->ref_frame[0]].as_int;
1442 thismvcost = vp9_mv_bit_cost(&this_mv->as_mv, &best_ref_mv->as_mv,
1443 mvjcost, mvcost, MV_COST_WEIGHT_SUB);
1444 if (has_second_rf) {
1445 this_second_mv->as_int = seg_mvs[mbmi->ref_frame[1]].as_int;
1446 thismvcost += vp9_mv_bit_cost(&this_second_mv->as_mv,
1447 &second_best_ref_mv->as_mv,
1448 mvjcost, mvcost, MV_COST_WEIGHT_SUB);
1452 this_mv->as_int = frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int;
1454 this_second_mv->as_int =
1455 frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int;
1458 this_mv->as_int = frame_mv[NEARMV][mbmi->ref_frame[0]].as_int;
1460 this_second_mv->as_int =
1461 frame_mv[NEARMV][mbmi->ref_frame[1]].as_int;
1464 this_mv->as_int = 0;
1466 this_second_mv->as_int = 0;
1472 cost = cost_mv_ref(cpi, this_mode,
1473 mbmi->mode_context[mbmi->ref_frame[0]]);
1475 mic->bmi[i].as_mv[0].as_int = this_mv->as_int;
1477 mic->bmi[i].as_mv[1].as_int = this_second_mv->as_int;
1479 mic->bmi[i].as_mode = m;
1481 for (idy = 0; idy < num_4x4_blocks_high; ++idy)
1482 for (idx = 0; idx < num_4x4_blocks_wide; ++idx)
1483 vpx_memcpy(&mic->bmi[i + idy * 2 + idx],
1484 &mic->bmi[i], sizeof(mic->bmi[i]));
1490 static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
1495 int64_t *distortion, int64_t *sse,
1496 ENTROPY_CONTEXT *ta,
1497 ENTROPY_CONTEXT *tl,
1498 int mi_row, int mi_col) {
1500 MACROBLOCKD *xd = &x->e_mbd;
1501 struct macroblockd_plane *const pd = &xd->plane[0];
1502 struct macroblock_plane *const p = &x->plane[0];
1503 MODE_INFO *const mi = xd->mi_8x8[0];
1504 const BLOCK_SIZE plane_bsize = get_plane_block_size(mi->mbmi.sb_type, pd);
1505 const int width = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
1506 const int height = 4 * num_4x4_blocks_high_lookup[plane_bsize];
1509 const uint8_t *const src = &p->src.buf[raster_block_offset(BLOCK_8X8, i,
1511 uint8_t *const dst = &pd->dst.buf[raster_block_offset(BLOCK_8X8, i,
1513 int64_t thisdistortion = 0, thissse = 0;
1514 int thisrate = 0, ref;
1515 const scan_order *so = &vp9_default_scan_orders[TX_4X4];
1516 const int is_compound = has_second_ref(&mi->mbmi);
1517 for (ref = 0; ref < 1 + is_compound; ++ref) {
1518 const uint8_t *pre = &pd->pre[ref].buf[raster_block_offset(BLOCK_8X8, i,
1519 pd->pre[ref].stride)];
1520 vp9_build_inter_predictor(pre, pd->pre[ref].stride,
1521 dst, pd->dst.stride,
1522 &mi->bmi[i].as_mv[ref].as_mv,
1523 xd->scale_factors[ref],
1524 width, height, ref, &xd->subpix, MV_PRECISION_Q3,
1525 mi_col * MI_SIZE + 4 * (i % 2),
1526 mi_row * MI_SIZE + 4 * (i / 2));
1529 vp9_subtract_block(height, width,
1530 raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 8,
1532 dst, pd->dst.stride);
1535 for (idy = 0; idy < height / 4; ++idy) {
1536 for (idx = 0; idx < width / 4; ++idx) {
1537 int64_t ssz, rd, rd1, rd2;
1540 k += (idy * 2 + idx);
1541 coeff = BLOCK_OFFSET(p->coeff, k);
1542 x->fwd_txm4x4(raster_block_offset_int16(BLOCK_8X8, k, p->src_diff),
1544 vp9_regular_quantize_b_4x4(x, 0, k, so->scan, so->iscan);
1545 thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k),
1548 thisrate += cost_coeffs(x, 0, k, ta + (k & 1), tl + (k >> 1), TX_4X4,
1549 so->scan, so->neighbors);
1550 rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion >> 2);
1551 rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse >> 2);
1558 *distortion = thisdistortion >> 2;
1559 *labelyrate = thisrate;
1560 *sse = thissse >> 2;
1562 return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion);
1573 ENTROPY_CONTEXT ta[2];
1574 ENTROPY_CONTEXT tl[2];
1578 int_mv *ref_mv, *second_ref_mv;
1586 MB_PREDICTION_MODE modes[4];
1587 SEG_RDSTAT rdstat[4][INTER_MODES];
1591 static INLINE int mv_check_bounds(MACROBLOCK *x, int_mv *mv) {
1593 r |= (mv->as_mv.row >> 3) < x->mv_row_min;
1594 r |= (mv->as_mv.row >> 3) > x->mv_row_max;
1595 r |= (mv->as_mv.col >> 3) < x->mv_col_min;
1596 r |= (mv->as_mv.col >> 3) > x->mv_col_max;
1600 static INLINE void mi_buf_shift(MACROBLOCK *x, int i) {
1601 MB_MODE_INFO *const mbmi = &x->e_mbd.mi_8x8[0]->mbmi;
1602 struct macroblock_plane *const p = &x->plane[0];
1603 struct macroblockd_plane *const pd = &x->e_mbd.plane[0];
1605 p->src.buf = &p->src.buf[raster_block_offset(BLOCK_8X8, i, p->src.stride)];
1606 assert(((intptr_t)pd->pre[0].buf & 0x7) == 0);
1607 pd->pre[0].buf = &pd->pre[0].buf[raster_block_offset(BLOCK_8X8, i,
1608 pd->pre[0].stride)];
1609 if (has_second_ref(mbmi))
1610 pd->pre[1].buf = &pd->pre[1].buf[raster_block_offset(BLOCK_8X8, i,
1611 pd->pre[1].stride)];
1614 static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src,
1615 struct buf_2d orig_pre[2]) {
1616 MB_MODE_INFO *mbmi = &x->e_mbd.mi_8x8[0]->mbmi;
1617 x->plane[0].src = orig_src;
1618 x->e_mbd.plane[0].pre[0] = orig_pre[0];
1619 if (has_second_ref(mbmi))
1620 x->e_mbd.plane[0].pre[1] = orig_pre[1];
1623 static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
1624 const TileInfo *const tile,
1625 BEST_SEG_INFO *bsi_buf, int filter_idx,
1626 int_mv seg_mvs[4][MAX_REF_FRAMES],
1627 int mi_row, int mi_col) {
1628 int i, br = 0, idx, idy;
1629 int64_t bd = 0, block_sse = 0;
1630 MB_PREDICTION_MODE this_mode;
1631 MODE_INFO *mi = x->e_mbd.mi_8x8[0];
1632 MB_MODE_INFO *const mbmi = &mi->mbmi;
1633 struct macroblock_plane *const p = &x->plane[0];
1634 struct macroblockd_plane *const pd = &x->e_mbd.plane[0];
1635 const int label_count = 4;
1636 int64_t this_segment_rd = 0;
1637 int label_mv_thresh;
1638 int segmentyrate = 0;
1639 const BLOCK_SIZE bsize = mbmi->sb_type;
1640 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1641 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1642 vp9_variance_fn_ptr_t *v_fn_ptr;
1643 ENTROPY_CONTEXT t_above[2], t_left[2];
1644 BEST_SEG_INFO *bsi = bsi_buf + filter_idx;
1646 int subpelmv = 1, have_ref = 0;
1647 const int has_second_rf = has_second_ref(mbmi);
1649 vpx_memcpy(t_above, pd->above_context, sizeof(t_above));
1650 vpx_memcpy(t_left, pd->left_context, sizeof(t_left));
1652 v_fn_ptr = &cpi->fn_ptr[bsize];
1654 // 64 makes this threshold really big effectively
1655 // making it so that we very rarely check mvs on
1656 // segments. setting this to 1 would make mv thresh
1657 // roughly equal to what it is for macroblocks
1658 label_mv_thresh = 1 * bsi->mvthresh / label_count;
1660 // Segmentation method overheads
1661 for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
1662 for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
1663 // TODO(jingning,rbultje): rewrite the rate-distortion optimization
1664 // loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop
1665 int_mv mode_mv[MB_MODE_COUNT], second_mode_mv[MB_MODE_COUNT];
1666 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
1667 MB_PREDICTION_MODE mode_selected = ZEROMV;
1668 int64_t best_rd = INT64_MAX;
1671 frame_mv[ZEROMV][mbmi->ref_frame[0]].as_int = 0;
1672 vp9_append_sub8x8_mvs_for_idx(&cpi->common, &x->e_mbd, tile,
1673 i, 0, mi_row, mi_col,
1674 &frame_mv[NEARESTMV][mbmi->ref_frame[0]],
1675 &frame_mv[NEARMV][mbmi->ref_frame[0]]);
1676 if (has_second_rf) {
1677 frame_mv[ZEROMV][mbmi->ref_frame[1]].as_int = 0;
1678 vp9_append_sub8x8_mvs_for_idx(&cpi->common, &x->e_mbd, tile,
1679 i, 1, mi_row, mi_col,
1680 &frame_mv[NEARESTMV][mbmi->ref_frame[1]],
1681 &frame_mv[NEARMV][mbmi->ref_frame[1]]);
1683 // search for the best motion vector on this segment
1684 for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
1685 const struct buf_2d orig_src = x->plane[0].src;
1686 struct buf_2d orig_pre[2];
1688 mode_idx = INTER_OFFSET(this_mode);
1689 bsi->rdstat[i][mode_idx].brdcost = INT64_MAX;
1691 // if we're near/nearest and mv == 0,0, compare to zeromv
1692 if ((this_mode == NEARMV || this_mode == NEARESTMV ||
1693 this_mode == ZEROMV) &&
1694 frame_mv[this_mode][mbmi->ref_frame[0]].as_int == 0 &&
1696 frame_mv[this_mode][mbmi->ref_frame[1]].as_int == 0)) {
1697 int rfc = mbmi->mode_context[mbmi->ref_frame[0]];
1698 int c1 = cost_mv_ref(cpi, NEARMV, rfc);
1699 int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
1700 int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
1702 if (this_mode == NEARMV) {
1705 } else if (this_mode == NEARESTMV) {
1709 assert(this_mode == ZEROMV);
1710 if (!has_second_rf) {
1712 frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0) ||
1714 frame_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0))
1718 frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0 &&
1719 frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int == 0) ||
1721 frame_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0 &&
1722 frame_mv[NEARMV][mbmi->ref_frame[1]].as_int == 0))
1728 vpx_memcpy(orig_pre, pd->pre, sizeof(orig_pre));
1729 vpx_memcpy(bsi->rdstat[i][mode_idx].ta, t_above,
1730 sizeof(bsi->rdstat[i][mode_idx].ta));
1731 vpx_memcpy(bsi->rdstat[i][mode_idx].tl, t_left,
1732 sizeof(bsi->rdstat[i][mode_idx].tl));
1734 // motion search for newmv (single predictor case only)
1735 if (!has_second_rf && this_mode == NEWMV &&
1736 seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) {
1739 int thissme, bestsme = INT_MAX;
1740 int sadpb = x->sadperbit4;
1744 /* Is the best so far sufficiently good that we cant justify doing
1745 * and new motion search. */
1746 if (best_rd < label_mv_thresh)
1749 if (cpi->compressor_speed) {
1750 // use previous block's result as next block's MV predictor.
1752 bsi->mvp.as_int = mi->bmi[i - 1].as_mv[0].as_int;
1754 bsi->mvp.as_int = mi->bmi[i - 2].as_mv[0].as_int;
1758 max_mv = x->max_mv_context[mbmi->ref_frame[0]];
1760 max_mv = MAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3;
1762 if (cpi->sf.auto_mv_step_size && cpi->common.show_frame) {
1763 // Take wtd average of the step_params based on the last frame's
1764 // max mv magnitude and the best ref mvs of the current block for
1765 // the given reference.
1766 step_param = (vp9_init_search_range(cpi, max_mv) +
1767 cpi->mv_step_param) >> 1;
1769 step_param = cpi->mv_step_param;
1772 mvp_full.as_mv.row = bsi->mvp.as_mv.row >> 3;
1773 mvp_full.as_mv.col = bsi->mvp.as_mv.col >> 3;
1775 if (cpi->sf.adaptive_motion_search && cpi->common.show_frame) {
1776 mvp_full.as_mv.row = x->pred_mv[mbmi->ref_frame[0]].as_mv.row >> 3;
1777 mvp_full.as_mv.col = x->pred_mv[mbmi->ref_frame[0]].as_mv.col >> 3;
1778 step_param = MAX(step_param, 8);
1781 further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
1782 // adjust src pointer for this block
1785 vp9_set_mv_search_range(x, &bsi->ref_mv->as_mv);
1787 if (cpi->sf.search_method == HEX) {
1788 bestsme = vp9_hex_search(x, &mvp_full.as_mv,
1790 sadpb, 1, v_fn_ptr, 1,
1791 &bsi->ref_mv->as_mv,
1792 &mode_mv[NEWMV].as_mv);
1793 } else if (cpi->sf.search_method == SQUARE) {
1794 bestsme = vp9_square_search(x, &mvp_full.as_mv,
1796 sadpb, 1, v_fn_ptr, 1,
1797 &bsi->ref_mv->as_mv,
1798 &mode_mv[NEWMV].as_mv);
1799 } else if (cpi->sf.search_method == BIGDIA) {
1800 bestsme = vp9_bigdia_search(x, &mvp_full.as_mv,
1802 sadpb, 1, v_fn_ptr, 1,
1803 &bsi->ref_mv->as_mv,
1804 &mode_mv[NEWMV].as_mv);
1806 bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
1807 sadpb, further_steps, 0, v_fn_ptr,
1808 bsi->ref_mv, &mode_mv[NEWMV]);
1811 // Should we do a full search (best quality only)
1812 if (cpi->compressor_speed == 0) {
1813 /* Check if mvp_full is within the range. */
1814 clamp_mv(&mvp_full.as_mv, x->mv_col_min, x->mv_col_max,
1815 x->mv_row_min, x->mv_row_max);
1817 thissme = cpi->full_search_sad(x, &mvp_full.as_mv,
1818 sadpb, 16, v_fn_ptr,
1819 x->nmvjointcost, x->mvcost,
1820 &bsi->ref_mv->as_mv, i);
1822 if (thissme < bestsme) {
1824 mode_mv[NEWMV].as_int = mi->bmi[i].as_mv[0].as_int;
1826 /* The full search result is actually worse so re-instate the
1827 * previous best vector */
1828 mi->bmi[i].as_mv[0].as_int = mode_mv[NEWMV].as_int;
1832 if (bestsme < INT_MAX) {
1834 cpi->find_fractional_mv_step(x,
1835 &mode_mv[NEWMV].as_mv,
1836 &bsi->ref_mv->as_mv,
1837 cpi->common.allow_high_precision_mv,
1838 x->errorperbit, v_fn_ptr,
1839 0, cpi->sf.subpel_iters_per_step,
1840 x->nmvjointcost, x->mvcost,
1841 &distortion, &x->pred_sse);
1843 // save motion search result for use in compound prediction
1844 seg_mvs[i][mbmi->ref_frame[0]].as_int = mode_mv[NEWMV].as_int;
1847 if (cpi->sf.adaptive_motion_search)
1848 x->pred_mv[mbmi->ref_frame[0]].as_int = mode_mv[NEWMV].as_int;
1850 // restore src pointers
1851 mi_buf_restore(x, orig_src, orig_pre);
1854 if (has_second_rf) {
1855 if (seg_mvs[i][mbmi->ref_frame[1]].as_int == INVALID_MV ||
1856 seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV)
1860 if (has_second_rf && this_mode == NEWMV &&
1861 mbmi->interp_filter == EIGHTTAP) {
1862 // adjust src pointers
1864 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
1866 joint_motion_search(cpi, x, bsize, frame_mv[this_mode],
1867 mi_row, mi_col, seg_mvs[i],
1869 seg_mvs[i][mbmi->ref_frame[0]].as_int =
1870 frame_mv[this_mode][mbmi->ref_frame[0]].as_int;
1871 seg_mvs[i][mbmi->ref_frame[1]].as_int =
1872 frame_mv[this_mode][mbmi->ref_frame[1]].as_int;
1874 // restore src pointers
1875 mi_buf_restore(x, orig_src, orig_pre);
1878 bsi->rdstat[i][mode_idx].brate =
1879 labels2mode(x, i, this_mode, &mode_mv[this_mode],
1880 &second_mode_mv[this_mode], frame_mv, seg_mvs[i],
1881 bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost,
1885 bsi->rdstat[i][mode_idx].mvs[0].as_int = mode_mv[this_mode].as_int;
1886 if (num_4x4_blocks_wide > 1)
1887 bsi->rdstat[i + 1][mode_idx].mvs[0].as_int =
1888 mode_mv[this_mode].as_int;
1889 if (num_4x4_blocks_high > 1)
1890 bsi->rdstat[i + 2][mode_idx].mvs[0].as_int =
1891 mode_mv[this_mode].as_int;
1892 if (has_second_rf) {
1893 bsi->rdstat[i][mode_idx].mvs[1].as_int =
1894 second_mode_mv[this_mode].as_int;
1895 if (num_4x4_blocks_wide > 1)
1896 bsi->rdstat[i + 1][mode_idx].mvs[1].as_int =
1897 second_mode_mv[this_mode].as_int;
1898 if (num_4x4_blocks_high > 1)
1899 bsi->rdstat[i + 2][mode_idx].mvs[1].as_int =
1900 second_mode_mv[this_mode].as_int;
1903 // Trap vectors that reach beyond the UMV borders
1904 if (mv_check_bounds(x, &mode_mv[this_mode]))
1906 if (has_second_rf &&
1907 mv_check_bounds(x, &second_mode_mv[this_mode]))
1910 if (filter_idx > 0) {
1911 BEST_SEG_INFO *ref_bsi = bsi_buf;
1912 subpelmv = (mode_mv[this_mode].as_mv.row & 0x0f) ||
1913 (mode_mv[this_mode].as_mv.col & 0x0f);
1914 have_ref = mode_mv[this_mode].as_int ==
1915 ref_bsi->rdstat[i][mode_idx].mvs[0].as_int;
1916 if (has_second_rf) {
1917 subpelmv |= (second_mode_mv[this_mode].as_mv.row & 0x0f) ||
1918 (second_mode_mv[this_mode].as_mv.col & 0x0f);
1919 have_ref &= second_mode_mv[this_mode].as_int ==
1920 ref_bsi->rdstat[i][mode_idx].mvs[1].as_int;
1923 if (filter_idx > 1 && !subpelmv && !have_ref) {
1924 ref_bsi = bsi_buf + 1;
1925 have_ref = mode_mv[this_mode].as_int ==
1926 ref_bsi->rdstat[i][mode_idx].mvs[0].as_int;
1927 if (has_second_rf) {
1928 have_ref &= second_mode_mv[this_mode].as_int ==
1929 ref_bsi->rdstat[i][mode_idx].mvs[1].as_int;
1933 if (!subpelmv && have_ref &&
1934 ref_bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
1935 vpx_memcpy(&bsi->rdstat[i][mode_idx], &ref_bsi->rdstat[i][mode_idx],
1936 sizeof(SEG_RDSTAT));
1937 if (num_4x4_blocks_wide > 1)
1938 bsi->rdstat[i + 1][mode_idx].eobs =
1939 ref_bsi->rdstat[i + 1][mode_idx].eobs;
1940 if (num_4x4_blocks_high > 1)
1941 bsi->rdstat[i + 2][mode_idx].eobs =
1942 ref_bsi->rdstat[i + 2][mode_idx].eobs;
1944 if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
1945 mode_selected = this_mode;
1946 best_rd = bsi->rdstat[i][mode_idx].brdcost;
1952 bsi->rdstat[i][mode_idx].brdcost =
1953 encode_inter_mb_segment(cpi, x,
1954 bsi->segment_rd - this_segment_rd, i,
1955 &bsi->rdstat[i][mode_idx].byrate,
1956 &bsi->rdstat[i][mode_idx].bdist,
1957 &bsi->rdstat[i][mode_idx].bsse,
1958 bsi->rdstat[i][mode_idx].ta,
1959 bsi->rdstat[i][mode_idx].tl,
1961 if (bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
1962 bsi->rdstat[i][mode_idx].brdcost += RDCOST(x->rdmult, x->rddiv,
1963 bsi->rdstat[i][mode_idx].brate, 0);
1964 bsi->rdstat[i][mode_idx].brate += bsi->rdstat[i][mode_idx].byrate;
1965 bsi->rdstat[i][mode_idx].eobs = p->eobs[i];
1966 if (num_4x4_blocks_wide > 1)
1967 bsi->rdstat[i + 1][mode_idx].eobs = p->eobs[i + 1];
1968 if (num_4x4_blocks_high > 1)
1969 bsi->rdstat[i + 2][mode_idx].eobs = p->eobs[i + 2];
1972 if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
1973 mode_selected = this_mode;
1974 best_rd = bsi->rdstat[i][mode_idx].brdcost;
1976 } /*for each 4x4 mode*/
1978 if (best_rd == INT64_MAX) {
1980 for (iy = i + 1; iy < 4; ++iy)
1981 for (midx = 0; midx < INTER_MODES; ++midx)
1982 bsi->rdstat[iy][midx].brdcost = INT64_MAX;
1983 bsi->segment_rd = INT64_MAX;
1987 mode_idx = INTER_OFFSET(mode_selected);
1988 vpx_memcpy(t_above, bsi->rdstat[i][mode_idx].ta, sizeof(t_above));
1989 vpx_memcpy(t_left, bsi->rdstat[i][mode_idx].tl, sizeof(t_left));
1991 labels2mode(x, i, mode_selected, &mode_mv[mode_selected],
1992 &second_mode_mv[mode_selected], frame_mv, seg_mvs[i],
1993 bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost,
1996 br += bsi->rdstat[i][mode_idx].brate;
1997 bd += bsi->rdstat[i][mode_idx].bdist;
1998 block_sse += bsi->rdstat[i][mode_idx].bsse;
1999 segmentyrate += bsi->rdstat[i][mode_idx].byrate;
2000 this_segment_rd += bsi->rdstat[i][mode_idx].brdcost;
2002 if (this_segment_rd > bsi->segment_rd) {
2004 for (iy = i + 1; iy < 4; ++iy)
2005 for (midx = 0; midx < INTER_MODES; ++midx)
2006 bsi->rdstat[iy][midx].brdcost = INT64_MAX;
2007 bsi->segment_rd = INT64_MAX;
2011 } /* for each label */
2015 bsi->segment_yrate = segmentyrate;
2016 bsi->segment_rd = this_segment_rd;
2017 bsi->sse = block_sse;
2019 // update the coding decisions
2020 for (i = 0; i < 4; ++i)
2021 bsi->modes[i] = mi->bmi[i].as_mode;
2024 static int64_t rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x,
2025 const TileInfo *const tile,
2026 int_mv *best_ref_mv,
2027 int_mv *second_best_ref_mv,
2031 int64_t *returndistortion,
2032 int *skippable, int64_t *psse,
2034 int_mv seg_mvs[4][MAX_REF_FRAMES],
2035 BEST_SEG_INFO *bsi_buf,
2037 int mi_row, int mi_col) {
2039 BEST_SEG_INFO *bsi = bsi_buf + filter_idx;
2040 MACROBLOCKD *xd = &x->e_mbd;
2041 MODE_INFO *mi = xd->mi_8x8[0];
2042 MB_MODE_INFO *mbmi = &mi->mbmi;
2047 bsi->segment_rd = best_rd;
2048 bsi->ref_mv = best_ref_mv;
2049 bsi->second_ref_mv = second_best_ref_mv;
2050 bsi->mvp.as_int = best_ref_mv->as_int;
2051 bsi->mvthresh = mvthresh;
2053 for (i = 0; i < 4; i++)
2054 bsi->modes[i] = ZEROMV;
2056 rd_check_segment_txsize(cpi, x, tile, bsi_buf, filter_idx, seg_mvs,
2059 if (bsi->segment_rd > best_rd)
2061 /* set it to the best */
2062 for (i = 0; i < 4; i++) {
2063 mode_idx = INTER_OFFSET(bsi->modes[i]);
2064 mi->bmi[i].as_mv[0].as_int = bsi->rdstat[i][mode_idx].mvs[0].as_int;
2065 if (has_second_ref(mbmi))
2066 mi->bmi[i].as_mv[1].as_int = bsi->rdstat[i][mode_idx].mvs[1].as_int;
2067 x->plane[0].eobs[i] = bsi->rdstat[i][mode_idx].eobs;
2068 mi->bmi[i].as_mode = bsi->modes[i];
2072 * used to set mbmi->mv.as_int
2074 *returntotrate = bsi->r;
2075 *returndistortion = bsi->d;
2076 *returnyrate = bsi->segment_yrate;
2077 *skippable = vp9_is_skippable_in_plane(x, BLOCK_8X8, 0);
2079 mbmi->mode = bsi->modes[3];
2081 return bsi->segment_rd;
2084 static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
2085 uint8_t *ref_y_buffer, int ref_y_stride,
2086 int ref_frame, BLOCK_SIZE block_size ) {
2087 MACROBLOCKD *xd = &x->e_mbd;
2088 MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
2093 int best_sad = INT_MAX;
2094 int this_sad = INT_MAX;
2097 uint8_t *src_y_ptr = x->plane[0].src.buf;
2099 int row_offset, col_offset;
2100 int num_mv_refs = MAX_MV_REF_CANDIDATES +
2101 (cpi->sf.adaptive_motion_search &&
2102 cpi->common.show_frame &&
2103 block_size < cpi->sf.max_partition_size);
2105 // Get the sad for each candidate reference mv
2106 for (i = 0; i < num_mv_refs; i++) {
2107 this_mv.as_int = (i < MAX_MV_REF_CANDIDATES) ?
2108 mbmi->ref_mvs[ref_frame][i].as_int : x->pred_mv[ref_frame].as_int;
2110 max_mv = MAX(max_mv,
2111 MAX(abs(this_mv.as_mv.row), abs(this_mv.as_mv.col)) >> 3);
2112 // The list is at an end if we see 0 for a second time.
2113 if (!this_mv.as_int && zero_seen)
2115 zero_seen = zero_seen || !this_mv.as_int;
2117 row_offset = this_mv.as_mv.row >> 3;
2118 col_offset = this_mv.as_mv.col >> 3;
2119 ref_y_ptr = ref_y_buffer + (ref_y_stride * row_offset) + col_offset;
2121 // Find sad for current vector.
2122 this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
2123 ref_y_ptr, ref_y_stride,
2126 // Note if it is the best so far.
2127 if (this_sad < best_sad) {
2128 best_sad = this_sad;
2133 // Note the index of the mv that worked best in the reference list.
2134 x->mv_best_ref_index[ref_frame] = best_index;
2135 x->max_mv_context[ref_frame] = max_mv;
2138 static void estimate_ref_frame_costs(VP9_COMP *cpi, int segment_id,
2139 unsigned int *ref_costs_single,
2140 unsigned int *ref_costs_comp,
2141 vp9_prob *comp_mode_p) {
2142 VP9_COMMON *const cm = &cpi->common;
2143 MACROBLOCKD *const xd = &cpi->mb.e_mbd;
2144 int seg_ref_active = vp9_segfeature_active(&cm->seg, segment_id,
2146 if (seg_ref_active) {
2147 vpx_memset(ref_costs_single, 0, MAX_REF_FRAMES * sizeof(*ref_costs_single));
2148 vpx_memset(ref_costs_comp, 0, MAX_REF_FRAMES * sizeof(*ref_costs_comp));
2151 vp9_prob intra_inter_p = vp9_get_intra_inter_prob(cm, xd);
2152 vp9_prob comp_inter_p = 128;
2154 if (cm->reference_mode == REFERENCE_MODE_SELECT) {
2155 comp_inter_p = vp9_get_reference_mode_prob(cm, xd);
2156 *comp_mode_p = comp_inter_p;
2161 ref_costs_single[INTRA_FRAME] = vp9_cost_bit(intra_inter_p, 0);
2163 if (cm->reference_mode != COMPOUND_REFERENCE) {
2164 vp9_prob ref_single_p1 = vp9_get_pred_prob_single_ref_p1(cm, xd);
2165 vp9_prob ref_single_p2 = vp9_get_pred_prob_single_ref_p2(cm, xd);
2166 unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1);
2168 if (cm->reference_mode == REFERENCE_MODE_SELECT)
2169 base_cost += vp9_cost_bit(comp_inter_p, 0);
2171 ref_costs_single[LAST_FRAME] = ref_costs_single[GOLDEN_FRAME] =
2172 ref_costs_single[ALTREF_FRAME] = base_cost;
2173 ref_costs_single[LAST_FRAME] += vp9_cost_bit(ref_single_p1, 0);
2174 ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p1, 1);
2175 ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p1, 1);
2176 ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p2, 0);
2177 ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p2, 1);
2179 ref_costs_single[LAST_FRAME] = 512;
2180 ref_costs_single[GOLDEN_FRAME] = 512;
2181 ref_costs_single[ALTREF_FRAME] = 512;
2183 if (cm->reference_mode != SINGLE_REFERENCE) {
2184 vp9_prob ref_comp_p = vp9_get_pred_prob_comp_ref_p(cm, xd);
2185 unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1);
2187 if (cm->reference_mode == REFERENCE_MODE_SELECT)
2188 base_cost += vp9_cost_bit(comp_inter_p, 1);
2190 ref_costs_comp[LAST_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 0);
2191 ref_costs_comp[GOLDEN_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 1);
2193 ref_costs_comp[LAST_FRAME] = 512;
2194 ref_costs_comp[GOLDEN_FRAME] = 512;
2199 static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
2202 int_mv *second_ref_mv,
2203 int64_t comp_pred_diff[REFERENCE_MODES],
2204 int64_t tx_size_diff[TX_MODES],
2205 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]) {
2206 MACROBLOCKD *const xd = &x->e_mbd;
2208 // Take a snapshot of the coding context so it can be
2209 // restored if we decide to encode this way
2210 ctx->skip = x->skip;
2211 ctx->best_mode_index = mode_index;
2212 ctx->mic = *xd->mi_8x8[0];
2214 ctx->best_ref_mv.as_int = ref_mv->as_int;
2215 ctx->second_best_ref_mv.as_int = second_ref_mv->as_int;
2217 ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE];
2218 ctx->comp_pred_diff = (int)comp_pred_diff[COMPOUND_REFERENCE];
2219 ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT];
2221 vpx_memcpy(ctx->tx_rd_diff, tx_size_diff, sizeof(ctx->tx_rd_diff));
2222 vpx_memcpy(ctx->best_filter_diff, best_filter_diff,
2223 sizeof(*best_filter_diff) * SWITCHABLE_FILTER_CONTEXTS);
2226 static void setup_pred_block(const MACROBLOCKD *xd,
2227 struct buf_2d dst[MAX_MB_PLANE],
2228 const YV12_BUFFER_CONFIG *src,
2229 int mi_row, int mi_col,
2230 const struct scale_factors *scale,
2231 const struct scale_factors *scale_uv) {
2234 dst[0].buf = src->y_buffer;
2235 dst[0].stride = src->y_stride;
2236 dst[1].buf = src->u_buffer;
2237 dst[2].buf = src->v_buffer;
2238 dst[1].stride = dst[2].stride = src->uv_stride;
2240 dst[3].buf = src->alpha_buffer;
2241 dst[3].stride = src->alpha_stride;
2244 // TODO(jkoleszar): Make scale factors per-plane data
2245 for (i = 0; i < MAX_MB_PLANE; i++) {
2246 setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col,
2247 i ? scale_uv : scale,
2248 xd->plane[i].subsampling_x, xd->plane[i].subsampling_y);
2252 static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
2253 const TileInfo *const tile,
2254 int idx, MV_REFERENCE_FRAME frame_type,
2255 BLOCK_SIZE block_size,
2256 int mi_row, int mi_col,
2257 int_mv frame_nearest_mv[MAX_REF_FRAMES],
2258 int_mv frame_near_mv[MAX_REF_FRAMES],
2259 struct buf_2d yv12_mb[4][MAX_MB_PLANE]) {
2260 VP9_COMMON *cm = &cpi->common;
2261 YV12_BUFFER_CONFIG *yv12 = &cm->yv12_fb[cpi->common.ref_frame_map[idx]];
2262 MACROBLOCKD *const xd = &x->e_mbd;
2263 MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
2264 const struct scale_factors *const sf =
2265 &cpi->common.active_ref_scale[frame_type - 1];
2268 // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this
2269 // use the UV scaling factors.
2270 setup_pred_block(xd, yv12_mb[frame_type], yv12, mi_row, mi_col, sf, sf);
2272 // Gets an initial list of candidate vectors from neighbours and orders them
2273 vp9_find_mv_refs(cm, xd, tile, xd->mi_8x8[0],
2276 mbmi->ref_mvs[frame_type], mi_row, mi_col);
2278 // Candidate refinement carried out at encoder and decoder
2279 vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv,
2280 mbmi->ref_mvs[frame_type],
2281 &frame_nearest_mv[frame_type],
2282 &frame_near_mv[frame_type]);
2284 // Further refinement that is encode side only to test the top few candidates
2285 // in full and choose the best as the centre point for subsequent searches.
2286 // The current implementation doesn't support scaling.
2287 if (!vp9_is_scaled(sf) && block_size >= BLOCK_8X8)
2288 mv_pred(cpi, x, yv12_mb[frame_type][0].buf, yv12->y_stride,
2289 frame_type, block_size);
2292 static YV12_BUFFER_CONFIG *get_scaled_ref_frame(VP9_COMP *cpi, int ref_frame) {
2293 YV12_BUFFER_CONFIG *scaled_ref_frame = NULL;
2294 int fb = get_ref_frame_idx(cpi, ref_frame);
2295 int fb_scale = get_scale_ref_frame_idx(cpi, ref_frame);
2296 if (cpi->scaled_ref_idx[fb_scale] != cpi->common.ref_frame_map[fb])
2297 scaled_ref_frame = &cpi->common.yv12_fb[cpi->scaled_ref_idx[fb_scale]];
2298 return scaled_ref_frame;
2301 static INLINE int get_switchable_rate(const MACROBLOCK *x) {
2302 const MACROBLOCKD *const xd = &x->e_mbd;
2303 const MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
2304 const int ctx = vp9_get_pred_context_switchable_interp(xd);
2305 return SWITCHABLE_INTERP_RATE_FACTOR *
2306 x->switchable_interp_costs[ctx][mbmi->interp_filter];
2309 static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
2310 const TileInfo *const tile,
2312 int mi_row, int mi_col,
2313 int_mv *tmp_mv, int *rate_mv) {
2314 MACROBLOCKD *xd = &x->e_mbd;
2315 VP9_COMMON *cm = &cpi->common;
2316 MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
2317 struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}};
2318 int bestsme = INT_MAX;
2319 int further_steps, step_param;
2320 int sadpb = x->sadperbit16;
2322 int ref = mbmi->ref_frame[0];
2323 int_mv ref_mv = mbmi->ref_mvs[ref][0];
2325 int tmp_col_min = x->mv_col_min;
2326 int tmp_col_max = x->mv_col_max;
2327 int tmp_row_min = x->mv_row_min;
2328 int tmp_row_max = x->mv_row_max;
2330 YV12_BUFFER_CONFIG *scaled_ref_frame = get_scaled_ref_frame(cpi, ref);
2332 if (scaled_ref_frame) {
2334 // Swap out the reference frame for a version that's been scaled to
2335 // match the resolution of the current frame, allowing the existing
2336 // motion search code to be used without additional modifications.
2337 for (i = 0; i < MAX_MB_PLANE; i++)
2338 backup_yv12[i] = xd->plane[i].pre[0];
2340 setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
2343 vp9_set_mv_search_range(x, &ref_mv.as_mv);
2345 // Adjust search parameters based on small partitions' result.
2347 // && abs(mvp_full.as_mv.row - x->pred_mv.as_mv.row) < 24 &&
2348 // abs(mvp_full.as_mv.col - x->pred_mv.as_mv.col) < 24) {
2349 // adjust search range
2354 // Get prediction MV.
2355 mvp_full.as_int = x->pred_mv[ref].as_int;
2357 // Adjust MV sign if needed.
2358 if (cm->ref_frame_sign_bias[ref]) {
2359 mvp_full.as_mv.col *= -1;
2360 mvp_full.as_mv.row *= -1;
2363 // Work out the size of the first step in the mv step search.
2364 // 0 here is maximum length first step. 1 is MAX >> 1 etc.
2365 if (cpi->sf.auto_mv_step_size && cpi->common.show_frame) {
2366 // Take wtd average of the step_params based on the last frame's
2367 // max mv magnitude and that based on the best ref mvs of the current
2368 // block for the given reference.
2369 step_param = (vp9_init_search_range(cpi, x->max_mv_context[ref]) +
2370 cpi->mv_step_param) >> 1;
2372 step_param = cpi->mv_step_param;
2376 if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64 &&
2377 cpi->common.show_frame) {
2378 int boffset = 2 * (b_width_log2(BLOCK_64X64) - MIN(b_height_log2(bsize),
2379 b_width_log2(bsize)));
2380 step_param = MAX(step_param, boffset);
2383 mvp_full.as_int = x->mv_best_ref_index[ref] < MAX_MV_REF_CANDIDATES ?
2384 mbmi->ref_mvs[ref][x->mv_best_ref_index[ref]].as_int :
2385 x->pred_mv[ref].as_int;
2387 mvp_full.as_mv.col >>= 3;
2388 mvp_full.as_mv.row >>= 3;
2390 // Further step/diamond searches as necessary
2391 further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
2393 if (cpi->sf.search_method == HEX) {
2394 bestsme = vp9_hex_search(x, &mvp_full.as_mv,
2397 &cpi->fn_ptr[bsize], 1,
2398 &ref_mv.as_mv, &tmp_mv->as_mv);
2399 } else if (cpi->sf.search_method == SQUARE) {
2400 bestsme = vp9_square_search(x, &mvp_full.as_mv,
2403 &cpi->fn_ptr[bsize], 1,
2404 &ref_mv.as_mv, &tmp_mv->as_mv);
2405 } else if (cpi->sf.search_method == BIGDIA) {
2406 bestsme = vp9_bigdia_search(x, &mvp_full.as_mv,
2409 &cpi->fn_ptr[bsize], 1,
2410 &ref_mv.as_mv, &tmp_mv->as_mv);
2412 bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
2413 sadpb, further_steps, 1,
2414 &cpi->fn_ptr[bsize],
2418 x->mv_col_min = tmp_col_min;
2419 x->mv_col_max = tmp_col_max;
2420 x->mv_row_min = tmp_row_min;
2421 x->mv_row_max = tmp_row_max;
2423 if (bestsme < INT_MAX) {
2424 int dis; /* TODO: use dis in distortion calculation later. */
2425 cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv.as_mv,
2426 cm->allow_high_precision_mv,
2428 &cpi->fn_ptr[bsize],
2429 0, cpi->sf.subpel_iters_per_step,
2430 x->nmvjointcost, x->mvcost,
2431 &dis, &x->pred_sse);
2433 *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv.as_mv,
2434 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2436 if (cpi->sf.adaptive_motion_search && cpi->common.show_frame)
2437 x->pred_mv[ref].as_int = tmp_mv->as_int;
2439 if (scaled_ref_frame) {
2441 for (i = 0; i < MAX_MB_PLANE; i++)
2442 xd->plane[i].pre[0] = backup_yv12[i];
2446 static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
2449 int mi_row, int mi_col,
2450 int_mv single_newmv[MAX_REF_FRAMES],
2452 const int pw = 4 * num_4x4_blocks_wide_lookup[bsize];
2453 const int ph = 4 * num_4x4_blocks_high_lookup[bsize];
2454 MACROBLOCKD *xd = &x->e_mbd;
2455 MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
2456 const int refs[2] = { mbmi->ref_frame[0],
2457 mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
2460 // Prediction buffer from second frame.
2461 uint8_t *second_pred = vpx_memalign(16, pw * ph * sizeof(uint8_t));
2463 // Do joint motion search in compound mode to get more accurate mv.
2464 struct buf_2d backup_yv12[2][MAX_MB_PLANE];
2465 struct buf_2d scaled_first_yv12 = xd->plane[0].pre[0];
2466 int last_besterr[2] = {INT_MAX, INT_MAX};
2467 YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = {
2468 get_scaled_ref_frame(cpi, mbmi->ref_frame[0]),
2469 get_scaled_ref_frame(cpi, mbmi->ref_frame[1])
2472 for (ref = 0; ref < 2; ++ref) {
2473 ref_mv[ref] = mbmi->ref_mvs[refs[ref]][0];
2475 if (scaled_ref_frame[ref]) {
2477 // Swap out the reference frame for a version that's been scaled to
2478 // match the resolution of the current frame, allowing the existing
2479 // motion search code to be used without additional modifications.
2480 for (i = 0; i < MAX_MB_PLANE; i++)
2481 backup_yv12[ref][i] = xd->plane[i].pre[ref];
2482 setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col, NULL);
2485 frame_mv[refs[ref]].as_int = single_newmv[refs[ref]].as_int;
2488 // Allow joint search multiple times iteratively for each ref frame
2489 // and break out the search loop if it couldn't find better mv.
2490 for (ite = 0; ite < 4; ite++) {
2491 struct buf_2d ref_yv12[2];
2492 int bestsme = INT_MAX;
2493 unsigned int bestsse = INT_MAX;
2494 int sadpb = x->sadperbit16;
2496 int search_range = 3;
2498 int tmp_col_min = x->mv_col_min;
2499 int tmp_col_max = x->mv_col_max;
2500 int tmp_row_min = x->mv_row_min;
2501 int tmp_row_max = x->mv_row_max;
2504 // Initialized here because of compiler problem in Visual Studio.
2505 ref_yv12[0] = xd->plane[0].pre[0];
2506 ref_yv12[1] = xd->plane[0].pre[1];
2508 // Get pred block from second frame.
2509 vp9_build_inter_predictor(ref_yv12[!id].buf,
2510 ref_yv12[!id].stride,
2512 &frame_mv[refs[!id]].as_mv,
2513 xd->scale_factors[!id],
2515 &xd->subpix, MV_PRECISION_Q3,
2516 mi_col * MI_SIZE, mi_row * MI_SIZE);
2518 // Compound motion search on first ref frame.
2520 xd->plane[0].pre[0] = ref_yv12[id];
2521 vp9_set_mv_search_range(x, &ref_mv[id].as_mv);
2523 // Use mv result from single mode as mvp.
2524 tmp_mv.as_int = frame_mv[refs[id]].as_int;
2526 tmp_mv.as_mv.col >>= 3;
2527 tmp_mv.as_mv.row >>= 3;
2529 // Small-range full-pixel motion search
2530 bestsme = vp9_refining_search_8p_c(x, &tmp_mv.as_mv, sadpb,
2532 &cpi->fn_ptr[bsize],
2533 x->nmvjointcost, x->mvcost,
2534 &ref_mv[id].as_mv, second_pred,
2537 x->mv_col_min = tmp_col_min;
2538 x->mv_col_max = tmp_col_max;
2539 x->mv_row_min = tmp_row_min;
2540 x->mv_row_max = tmp_row_max;
2542 if (bestsme < INT_MAX) {
2543 int dis; /* TODO: use dis in distortion calculation later. */
2544 bestsme = cpi->find_fractional_mv_step_comp(
2547 cpi->common.allow_high_precision_mv,
2549 &cpi->fn_ptr[bsize],
2550 0, cpi->sf.subpel_iters_per_step,
2551 x->nmvjointcost, x->mvcost,
2552 &dis, &bestsse, second_pred,
2557 xd->plane[0].pre[0] = scaled_first_yv12;
2559 if (bestsme < last_besterr[id]) {
2560 frame_mv[refs[id]].as_int = tmp_mv.as_int;
2561 last_besterr[id] = bestsme;
2562 x->pred_sse = bestsse;
2570 for (ref = 0; ref < 2; ++ref) {
2571 if (scaled_ref_frame[ref]) {
2572 // restore the predictor
2574 for (i = 0; i < MAX_MB_PLANE; i++)
2575 xd->plane[i].pre[ref] = backup_yv12[ref][i];
2578 *rate_mv += vp9_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
2579 &mbmi->ref_mvs[refs[ref]][0].as_mv,
2580 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2583 vpx_free(second_pred);
2586 static INLINE void restore_dst_buf(MACROBLOCKD *xd,
2587 uint8_t *orig_dst[MAX_MB_PLANE],
2588 int orig_dst_stride[MAX_MB_PLANE]) {
2590 for (i = 0; i < MAX_MB_PLANE; i++) {
2591 xd->plane[i].dst.buf = orig_dst[i];
2592 xd->plane[i].dst.stride = orig_dst_stride[i];
2596 static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
2597 const TileInfo *const tile,
2599 int64_t txfm_cache[],
2600 int *rate2, int64_t *distortion,
2602 int *rate_y, int64_t *distortion_y,
2603 int *rate_uv, int64_t *distortion_uv,
2604 int *mode_excluded, int *disable_skip,
2605 INTERPOLATION_TYPE *best_filter,
2606 int_mv (*mode_mv)[MAX_REF_FRAMES],
2607 int mi_row, int mi_col,
2608 int_mv single_newmv[MAX_REF_FRAMES],
2610 const int64_t ref_best_rd) {
2611 VP9_COMMON *cm = &cpi->common;
2612 MACROBLOCKD *xd = &x->e_mbd;
2613 MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
2614 const int is_comp_pred = has_second_ref(mbmi);
2615 const int num_refs = is_comp_pred ? 2 : 1;
2616 const int this_mode = mbmi->mode;
2617 int_mv *frame_mv = mode_mv[this_mode];
2619 int refs[2] = { mbmi->ref_frame[0],
2620 (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
2622 int64_t this_rd = 0;
2623 DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf, MAX_MB_PLANE * 64 * 64);
2624 int pred_exists = 0;
2626 int64_t rd, best_rd = INT64_MAX;
2627 int best_needs_copy = 0;
2628 uint8_t *orig_dst[MAX_MB_PLANE];
2629 int orig_dst_stride[MAX_MB_PLANE];
2633 if (frame_mv[refs[0]].as_int == INVALID_MV ||
2634 frame_mv[refs[1]].as_int == INVALID_MV)
2638 if (this_mode == NEWMV) {
2641 // Initialize mv using single prediction mode result.
2642 frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
2643 frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
2645 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
2646 joint_motion_search(cpi, x, bsize, frame_mv,
2647 mi_row, mi_col, single_newmv, &rate_mv);
2649 rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]].as_mv,
2650 &mbmi->ref_mvs[refs[0]][0].as_mv,
2651 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2652 rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]].as_mv,
2653 &mbmi->ref_mvs[refs[1]][0].as_mv,
2654 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2659 single_motion_search(cpi, x, tile, bsize, mi_row, mi_col,
2662 frame_mv[refs[0]].as_int =
2663 xd->mi_8x8[0]->bmi[0].as_mv[0].as_int = tmp_mv.as_int;
2664 single_newmv[refs[0]].as_int = tmp_mv.as_int;
2668 // if we're near/nearest and mv == 0,0, compare to zeromv
2669 if ((this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) &&
2670 frame_mv[refs[0]].as_int == 0 &&
2671 !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) &&
2672 (num_refs == 1 || frame_mv[refs[1]].as_int == 0)) {
2673 int rfc = mbmi->mode_context[mbmi->ref_frame[0]];
2674 int c1 = cost_mv_ref(cpi, NEARMV, rfc);
2675 int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
2676 int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
2678 if (this_mode == NEARMV) {
2681 } else if (this_mode == NEARESTMV) {
2685 assert(this_mode == ZEROMV);
2686 if (num_refs == 1) {
2688 mode_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0) ||
2690 mode_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0))
2694 mode_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0 &&
2695 mode_mv[NEARESTMV][mbmi->ref_frame[1]].as_int == 0) ||
2697 mode_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0 &&
2698 mode_mv[NEARMV][mbmi->ref_frame[1]].as_int == 0))
2704 for (i = 0; i < num_refs; ++i) {
2705 cur_mv[i] = frame_mv[refs[i]];
2706 // Clip "next_nearest" so that it does not extend to far out of image
2707 if (this_mode != NEWMV)
2708 clamp_mv2(&cur_mv[i].as_mv, xd);
2710 if (mv_check_bounds(x, &cur_mv[i]))
2712 mbmi->mv[i].as_int = cur_mv[i].as_int;
2715 // do first prediction into the destination buffer. Do the next
2716 // prediction into a temporary buffer. Then keep track of which one
2717 // of these currently holds the best predictor, and use the other
2718 // one for future predictions. In the end, copy from tmp_buf to
2719 // dst if necessary.
2720 for (i = 0; i < MAX_MB_PLANE; i++) {
2721 orig_dst[i] = xd->plane[i].dst.buf;
2722 orig_dst_stride[i] = xd->plane[i].dst.stride;
2725 /* We don't include the cost of the second reference here, because there
2726 * are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other
2727 * words if you present them in that order, the second one is always known
2728 * if the first is known */
2729 *rate2 += cost_mv_ref(cpi, this_mode,
2730 mbmi->mode_context[mbmi->ref_frame[0]]);
2732 if (!(*mode_excluded))
2733 *mode_excluded = is_comp_pred
2734 ? cpi->common.reference_mode == SINGLE_REFERENCE
2735 : cpi->common.reference_mode == COMPOUND_REFERENCE;
2738 // Are all MVs integer pel for Y and UV
2739 intpel_mv = (mbmi->mv[0].as_mv.row & 15) == 0 &&
2740 (mbmi->mv[0].as_mv.col & 15) == 0;
2742 intpel_mv &= (mbmi->mv[1].as_mv.row & 15) == 0 &&
2743 (mbmi->mv[1].as_mv.col & 15) == 0;
2746 // Search for best switchable filter by checking the variance of
2747 // pred error irrespective of whether the filter will be used
2748 cpi->mask_filter_rd = 0;
2749 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
2750 cpi->rd_filter_cache[i] = INT64_MAX;
2752 if (cm->mcomp_filter_type != BILINEAR) {
2753 *best_filter = EIGHTTAP;
2754 if (x->source_variance <
2755 cpi->sf.disable_filter_search_var_thresh) {
2756 *best_filter = EIGHTTAP;
2759 int tmp_rate_sum = 0;
2760 int64_t tmp_dist_sum = 0;
2762 for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
2765 mbmi->interp_filter = i;
2766 vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
2767 rs = get_switchable_rate(x);
2768 rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
2770 if (i > 0 && intpel_mv) {
2771 rd = RDCOST(x->rdmult, x->rddiv, tmp_rate_sum, tmp_dist_sum);
2772 cpi->rd_filter_cache[i] = rd;
2773 cpi->rd_filter_cache[SWITCHABLE_FILTERS] =
2774 MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
2775 if (cm->mcomp_filter_type == SWITCHABLE)
2777 cpi->mask_filter_rd = MAX(cpi->mask_filter_rd, rd);
2780 int64_t dist_sum = 0;
2781 if ((cm->mcomp_filter_type == SWITCHABLE &&
2782 (!i || best_needs_copy)) ||
2783 (cm->mcomp_filter_type != SWITCHABLE &&
2784 (cm->mcomp_filter_type == mbmi->interp_filter ||
2785 (i == 0 && intpel_mv)))) {
2786 restore_dst_buf(xd, orig_dst, orig_dst_stride);
2788 for (j = 0; j < MAX_MB_PLANE; j++) {
2789 xd->plane[j].dst.buf = tmp_buf + j * 64 * 64;
2790 xd->plane[j].dst.stride = 64;
2793 vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
2794 model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum);
2796 rd = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum);
2797 cpi->rd_filter_cache[i] = rd;
2798 cpi->rd_filter_cache[SWITCHABLE_FILTERS] =
2799 MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
2800 if (cm->mcomp_filter_type == SWITCHABLE)
2802 cpi->mask_filter_rd = MAX(cpi->mask_filter_rd, rd);
2804 if (i == 0 && intpel_mv) {
2805 tmp_rate_sum = rate_sum;
2806 tmp_dist_sum = dist_sum;
2810 if (i == 0 && cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
2811 if (rd / 2 > ref_best_rd) {
2812 restore_dst_buf(xd, orig_dst, orig_dst_stride);
2816 newbest = i == 0 || rd < best_rd;
2820 *best_filter = mbmi->interp_filter;
2821 if (cm->mcomp_filter_type == SWITCHABLE && i && !intpel_mv)
2822 best_needs_copy = !best_needs_copy;
2825 if ((cm->mcomp_filter_type == SWITCHABLE && newbest) ||
2826 (cm->mcomp_filter_type != SWITCHABLE &&
2827 cm->mcomp_filter_type == mbmi->interp_filter)) {
2831 restore_dst_buf(xd, orig_dst, orig_dst_stride);
2834 // Set the appropriate filter
2835 mbmi->interp_filter = cm->mcomp_filter_type != SWITCHABLE ?
2836 cm->mcomp_filter_type : *best_filter;
2837 vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
2838 rs = cm->mcomp_filter_type == SWITCHABLE ? get_switchable_rate(x) : 0;
2841 if (best_needs_copy) {
2842 // again temporarily set the buffers to local memory to prevent a memcpy
2843 for (i = 0; i < MAX_MB_PLANE; i++) {
2844 xd->plane[i].dst.buf = tmp_buf + i * 64 * 64;
2845 xd->plane[i].dst.stride = 64;
2849 // Handles the special case when a filter that is not in the
2850 // switchable list (ex. bilinear, 6-tap) is indicated at the frame level
2851 vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
2855 if (cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
2858 model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist);
2859 rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist);
2860 // if current pred_error modeled rd is substantially more than the best
2861 // so far, do not bother doing full rd
2862 if (rd / 2 > ref_best_rd) {
2863 restore_dst_buf(xd, orig_dst, orig_dst_stride);
2868 if (cpi->common.mcomp_filter_type == SWITCHABLE)
2869 *rate2 += get_switchable_rate(x);
2871 if (!is_comp_pred && cpi->enable_encode_breakout) {
2872 if (cpi->active_map_enabled && x->active_ptr[0] == 0)
2874 else if (x->encode_breakout) {
2875 const BLOCK_SIZE y_size = get_plane_block_size(bsize, &xd->plane[0]);
2876 const BLOCK_SIZE uv_size = get_plane_block_size(bsize, &xd->plane[1]);
2877 unsigned int var, sse;
2878 // Skipping threshold for ac.
2879 unsigned int thresh_ac;
2880 // The encode_breakout input
2881 unsigned int encode_breakout = x->encode_breakout << 4;
2882 unsigned int max_thresh = 36000;
2884 // Use extreme low threshold for static frames to limit skipping.
2885 if (cpi->enable_encode_breakout == 2)
2888 // Calculate threshold according to dequant value.
2889 thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) / 9;
2891 // Use encode_breakout input if it is bigger than internal threshold.
2892 if (thresh_ac < encode_breakout)
2893 thresh_ac = encode_breakout;
2895 // Set a maximum for threshold to avoid big PSNR loss in low bitrate case.
2896 if (thresh_ac > max_thresh)
2897 thresh_ac = max_thresh;
2899 var = cpi->fn_ptr[y_size].vf(x->plane[0].src.buf, x->plane[0].src.stride,
2900 xd->plane[0].dst.buf,
2901 xd->plane[0].dst.stride, &sse);
2903 // Adjust threshold according to partition size.
2904 thresh_ac >>= 8 - (b_width_log2_lookup[bsize] +
2905 b_height_log2_lookup[bsize]);
2907 // Y skipping condition checking
2908 if (sse < thresh_ac || sse == 0) {
2909 // Skipping threshold for dc
2910 unsigned int thresh_dc;
2912 thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6);
2914 // dc skipping checking
2915 if ((sse - var) < thresh_dc || sse == var) {
2916 unsigned int sse_u, sse_v;
2917 unsigned int var_u, var_v;
2919 var_u = cpi->fn_ptr[uv_size].vf(x->plane[1].src.buf,
2920 x->plane[1].src.stride,
2921 xd->plane[1].dst.buf,
2922 xd->plane[1].dst.stride, &sse_u);
2924 // U skipping condition checking
2925 if ((sse_u * 4 < thresh_ac || sse_u == 0) &&
2926 (sse_u - var_u < thresh_dc || sse_u == var_u)) {
2927 var_v = cpi->fn_ptr[uv_size].vf(x->plane[2].src.buf,
2928 x->plane[2].src.stride,
2929 xd->plane[2].dst.buf,
2930 xd->plane[2].dst.stride, &sse_v);
2932 // V skipping condition checking
2933 if ((sse_v * 4 < thresh_ac || sse_v == 0) &&
2934 (sse_v - var_v < thresh_dc || sse_v == var_v)) {
2937 // The cost of skip bit needs to be added.
2938 *rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
2940 // Scaling factor for SSE from spatial domain to frequency domain
2941 // is 16. Adjust distortion accordingly.
2942 *distortion_uv = (sse_u + sse_v) << 4;
2943 *distortion = (sse << 4) + *distortion_uv;
2946 this_rd = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
2955 int skippable_y, skippable_uv;
2956 int64_t sseuv = INT64_MAX;
2957 int64_t rdcosty = INT64_MAX;
2959 // Y cost and distortion
2960 super_block_yrd(cpi, x, rate_y, distortion_y, &skippable_y, psse,
2961 bsize, txfm_cache, ref_best_rd);
2963 if (*rate_y == INT_MAX) {
2965 *distortion = INT64_MAX;
2966 restore_dst_buf(xd, orig_dst, orig_dst_stride);
2971 *distortion += *distortion_y;
2973 rdcosty = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
2974 rdcosty = MIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse));
2976 super_block_uvrd(cpi, x, rate_uv, distortion_uv, &skippable_uv, &sseuv,
2977 bsize, ref_best_rd - rdcosty);
2978 if (*rate_uv == INT_MAX) {
2980 *distortion = INT64_MAX;
2981 restore_dst_buf(xd, orig_dst, orig_dst_stride);
2987 *distortion += *distortion_uv;
2988 *skippable = skippable_y && skippable_uv;
2991 restore_dst_buf(xd, orig_dst, orig_dst_stride);
2992 return this_rd; // if 0, this will be re-calculated by caller
2995 static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
2997 struct macroblock_plane *const p = x->plane;
2998 struct macroblockd_plane *const pd = x->e_mbd.plane;
3001 for (i = 0; i < max_plane; ++i) {
3002 p[i].coeff = ctx->coeff_pbuf[i][1];
3003 p[i].qcoeff = ctx->qcoeff_pbuf[i][1];
3004 pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1];
3005 p[i].eobs = ctx->eobs_pbuf[i][1];
3007 ctx->coeff_pbuf[i][1] = ctx->coeff_pbuf[i][0];
3008 ctx->qcoeff_pbuf[i][1] = ctx->qcoeff_pbuf[i][0];
3009 ctx->dqcoeff_pbuf[i][1] = ctx->dqcoeff_pbuf[i][0];
3010 ctx->eobs_pbuf[i][1] = ctx->eobs_pbuf[i][0];
3012 ctx->coeff_pbuf[i][0] = p[i].coeff;
3013 ctx->qcoeff_pbuf[i][0] = p[i].qcoeff;
3014 ctx->dqcoeff_pbuf[i][0] = pd[i].dqcoeff;
3015 ctx->eobs_pbuf[i][0] = p[i].eobs;
3019 void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
3020 int *returnrate, int64_t *returndist,
3022 PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
3023 VP9_COMMON *const cm = &cpi->common;
3024 MACROBLOCKD *const xd = &x->e_mbd;
3025 int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
3026 int y_skip = 0, uv_skip = 0;
3027 int64_t dist_y = 0, dist_uv = 0, tx_cache[TX_MODES] = { 0 };
3028 TX_SIZE max_uv_tx_size;
3031 xd->mi_8x8[0]->mbmi.ref_frame[0] = INTRA_FRAME;
3033 if (bsize >= BLOCK_8X8) {
3034 if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly,
3035 &dist_y, &y_skip, bsize, tx_cache,
3036 best_rd) >= best_rd) {
3037 *returnrate = INT_MAX;
3040 max_uv_tx_size = get_uv_tx_size_impl(xd->mi_8x8[0]->mbmi.tx_size, bsize);
3041 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
3042 &dist_uv, &uv_skip, bsize, max_uv_tx_size);
3045 if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate_y, &rate_y_tokenonly,
3046 &dist_y, best_rd) >= best_rd) {
3047 *returnrate = INT_MAX;
3050 max_uv_tx_size = get_uv_tx_size_impl(xd->mi_8x8[0]->mbmi.tx_size, bsize);
3051 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
3052 &dist_uv, &uv_skip, BLOCK_8X8, max_uv_tx_size);
3055 if (y_skip && uv_skip) {
3056 *returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
3057 vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
3058 *returndist = dist_y + dist_uv;
3059 vp9_zero(ctx->tx_rd_diff);
3062 *returnrate = rate_y + rate_uv + vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
3063 *returndist = dist_y + dist_uv;
3064 if (cpi->sf.tx_size_search_method == USE_FULL_RD)
3065 for (i = 0; i < TX_MODES; i++) {
3066 if (tx_cache[i] < INT64_MAX && tx_cache[cm->tx_mode] < INT64_MAX)
3067 ctx->tx_rd_diff[i] = tx_cache[i] - tx_cache[cm->tx_mode];
3069 ctx->tx_rd_diff[i] = 0;
3073 ctx->mic = *xd->mi_8x8[0];
3076 int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
3077 const TileInfo *const tile,
3078 int mi_row, int mi_col,
3080 int64_t *returndistortion,
3082 PICK_MODE_CONTEXT *ctx,
3083 int64_t best_rd_so_far) {
3084 VP9_COMMON *cm = &cpi->common;
3085 MACROBLOCKD *xd = &x->e_mbd;
3086 MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
3087 const struct segmentation *seg = &cm->seg;
3088 const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]);
3089 MB_PREDICTION_MODE this_mode;
3090 MV_REFERENCE_FRAME ref_frame, second_ref_frame;
3091 unsigned char segment_id = mbmi->segment_id;
3093 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
3094 struct buf_2d yv12_mb[4][MAX_MB_PLANE];
3095 int_mv single_newmv[MAX_REF_FRAMES] = { { 0 } };
3096 static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
3098 int idx_list[4] = {0,
3102 int64_t best_rd = best_rd_so_far;
3103 int64_t best_tx_rd[TX_MODES];
3104 int64_t best_tx_diff[TX_MODES];
3105 int64_t best_pred_diff[REFERENCE_MODES];
3106 int64_t best_pred_rd[REFERENCE_MODES];
3107 int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
3108 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
3109 MB_MODE_INFO best_mbmode = { 0 };
3111 int mode_index, best_mode_index = 0;
3112 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
3113 vp9_prob comp_mode_p;
3114 int64_t best_intra_rd = INT64_MAX;
3115 int64_t best_inter_rd = INT64_MAX;
3116 MB_PREDICTION_MODE best_intra_mode = DC_PRED;
3117 MV_REFERENCE_FRAME best_inter_ref_frame = LAST_FRAME;
3118 INTERPOLATION_TYPE tmp_best_filter = SWITCHABLE;
3119 int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES];
3120 int64_t dist_uv[TX_SIZES];
3121 int skip_uv[TX_SIZES];
3122 MB_PREDICTION_MODE mode_uv[TX_SIZES];
3123 unsigned int ref_frame_mask = 0;
3124 unsigned int mode_mask = 0;
3125 int64_t mode_distortions[MB_MODE_COUNT] = {-1};
3126 int64_t frame_distortions[MAX_REF_FRAMES] = {-1};
3127 int intra_cost_penalty = 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q);
3128 const int bws = num_8x8_blocks_wide_lookup[bsize] / 2;
3129 const int bhs = num_8x8_blocks_high_lookup[bsize] / 2;
3132 x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
3134 // Everywhere the flag is set the error is much higher than its neighbors.
3135 ctx->frames_with_high_error = 0;
3136 ctx->modes_with_high_error = 0;
3138 estimate_ref_frame_costs(cpi, segment_id, ref_costs_single, ref_costs_comp,
3141 for (i = 0; i < REFERENCE_MODES; ++i)
3142 best_pred_rd[i] = INT64_MAX;
3143 for (i = 0; i < TX_MODES; i++)
3144 best_tx_rd[i] = INT64_MAX;
3145 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
3146 best_filter_rd[i] = INT64_MAX;
3147 for (i = 0; i < TX_SIZES; i++)
3148 rate_uv_intra[i] = INT_MAX;
3150 *returnrate = INT_MAX;
3152 // Create a mask set to 1 for each reference frame used by a smaller
3154 if (cpi->sf.use_avoid_tested_higherror) {
3155 switch (block_size) {
3157 for (i = 0; i < 4; i++) {
3158 for (j = 0; j < 4; j++) {
3159 ref_frame_mask |= x->mb_context[i][j].frames_with_high_error;
3160 mode_mask |= x->mb_context[i][j].modes_with_high_error;
3163 for (i = 0; i < 4; i++) {
3164 ref_frame_mask |= x->sb32_context[i].frames_with_high_error;
3165 mode_mask |= x->sb32_context[i].modes_with_high_error;
3169 for (i = 0; i < 4; i++) {
3171 x->mb_context[x->sb_index][i].frames_with_high_error;
3172 mode_mask |= x->mb_context[x->sb_index][i].modes_with_high_error;
3176 // Until we handle all block sizes set it to present;
3181 ref_frame_mask = ~ref_frame_mask;
3182 mode_mask = ~mode_mask;
3185 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
3186 if (cpi->ref_frame_flags & flag_list[ref_frame]) {
3187 setup_buffer_inter(cpi, x, tile, idx_list[ref_frame], ref_frame,
3188 block_size, mi_row, mi_col,
3189 frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb);
3191 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
3192 frame_mv[ZEROMV][ref_frame].as_int = 0;
3195 for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
3196 int mode_excluded = 0;
3197 int64_t this_rd = INT64_MAX;
3198 int disable_skip = 0;
3199 int compmode_cost = 0;
3200 int rate2 = 0, rate_y = 0, rate_uv = 0;
3201 int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
3203 int64_t tx_cache[TX_MODES];
3206 int64_t total_sse = INT_MAX;
3209 for (i = 0; i < TX_MODES; ++i)
3210 tx_cache[i] = INT64_MAX;
3213 this_mode = vp9_mode_order[mode_index].mode;
3214 ref_frame = vp9_mode_order[mode_index].ref_frame;
3215 second_ref_frame = vp9_mode_order[mode_index].second_ref_frame;
3217 // Look at the reference frame of the best mode so far and set the
3218 // skip mask to look at a subset of the remaining modes.
3219 if (mode_index > cpi->sf.mode_skip_start) {
3220 if (mode_index == (cpi->sf.mode_skip_start + 1)) {
3221 switch (vp9_mode_order[best_mode_index].ref_frame) {
3223 cpi->mode_skip_mask = 0;
3226 cpi->mode_skip_mask = LAST_FRAME_MODE_MASK;
3229 cpi->mode_skip_mask = GOLDEN_FRAME_MODE_MASK;
3232 cpi->mode_skip_mask = ALT_REF_MODE_MASK;
3235 case MAX_REF_FRAMES:
3236 assert(0 && "Invalid Reference frame");
3239 if (cpi->mode_skip_mask & ((int64_t)1 << mode_index))
3243 // Skip if the current reference frame has been masked off
3244 if (cpi->sf.reference_masking && !cpi->set_ref_frame_mask &&
3245 (cpi->ref_frame_mask & (1 << ref_frame)))
3248 // Test best rd so far against threshold for trying this mode.
3249 if ((best_rd < ((int64_t)cpi->rd_threshes[segment_id][bsize][mode_index] *
3250 cpi->rd_thresh_freq_fact[bsize][mode_index] >> 5)) ||
3251 cpi->rd_threshes[segment_id][bsize][mode_index] == INT_MAX)
3254 // Do not allow compound prediction if the segment level reference
3255 // frame feature is in use as in this case there can only be one reference.
3256 if ((second_ref_frame > INTRA_FRAME) &&
3257 vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
3260 // Skip some checking based on small partitions' result.
3261 if (x->fast_ms > 1 && !ref_frame)
3263 if (x->fast_ms > 2 && ref_frame != x->subblock_ref)
3266 if (cpi->sf.use_avoid_tested_higherror && bsize >= BLOCK_8X8) {
3267 if (!(ref_frame_mask & (1 << ref_frame))) {
3270 if (!(mode_mask & (1 << this_mode))) {
3273 if (second_ref_frame != NONE
3274 && !(ref_frame_mask & (1 << second_ref_frame))) {
3279 mbmi->ref_frame[0] = ref_frame;
3280 mbmi->ref_frame[1] = second_ref_frame;
3282 if (!(ref_frame == INTRA_FRAME
3283 || (cpi->ref_frame_flags & flag_list[ref_frame]))) {
3286 if (!(second_ref_frame == NONE
3287 || (cpi->ref_frame_flags & flag_list[second_ref_frame]))) {
3291 comp_pred = second_ref_frame > INTRA_FRAME;
3293 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA)
3294 if (vp9_mode_order[best_mode_index].ref_frame == INTRA_FRAME)
3296 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH)
3297 if (ref_frame != best_inter_ref_frame &&
3298 second_ref_frame != best_inter_ref_frame)
3302 set_scale_factors(cm, xd, ref_frame - 1, second_ref_frame - 1);
3303 mbmi->uv_mode = DC_PRED;
3305 // Evaluate all sub-pel filters irrespective of whether we can use
3306 // them for this frame.
3307 mbmi->interp_filter = cm->mcomp_filter_type;
3308 vp9_setup_interp_filters(xd, mbmi->interp_filter, cm);
3311 if (!(cpi->ref_frame_flags & flag_list[second_ref_frame]))
3314 mode_excluded = mode_excluded ? mode_excluded
3315 : cm->reference_mode == SINGLE_REFERENCE;
3317 if (ref_frame != INTRA_FRAME && second_ref_frame != INTRA_FRAME)
3318 mode_excluded = mode_excluded ?
3319 mode_excluded : cm->reference_mode == COMPOUND_REFERENCE;
3322 // Select prediction reference frames.
3323 for (i = 0; i < MAX_MB_PLANE; i++) {
3324 xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
3326 xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
3329 // If the segment reference frame feature is enabled....
3330 // then do nothing if the current ref frame is not allowed..
3331 if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
3332 vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) !=
3335 // If the segment skip feature is enabled....
3336 // then do nothing if the current mode is not allowed..
3337 } else if (vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP) &&
3338 (this_mode != ZEROMV && ref_frame != INTRA_FRAME)) {
3340 // Disable this drop out case if the ref frame
3341 // segment level feature is enabled for this segment. This is to
3342 // prevent the possibility that we end up unable to pick any mode.
3343 } else if (!vp9_segfeature_active(seg, segment_id,
3344 SEG_LVL_REF_FRAME)) {
3345 // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
3346 // unless ARNR filtering is enabled in which case we want
3347 // an unfiltered alternative. We allow near/nearest as well
3348 // because they may result in zero-zero MVs but be cheaper.
3349 if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
3350 if ((this_mode != ZEROMV &&
3351 !(this_mode == NEARMV &&
3352 frame_mv[NEARMV][ALTREF_FRAME].as_int == 0) &&
3353 !(this_mode == NEARESTMV &&
3354 frame_mv[NEARESTMV][ALTREF_FRAME].as_int == 0)) ||
3355 ref_frame != ALTREF_FRAME) {
3360 // TODO(JBB): This is to make up for the fact that we don't have sad
3361 // functions that work when the block size reads outside the umv. We
3362 // should fix this either by making the motion search just work on
3363 // a representative block in the boundary ( first ) and then implement a
3364 // function that does sads when inside the border..
3365 if (((mi_row + bhs) > cm->mi_rows || (mi_col + bws) > cm->mi_cols) &&
3366 this_mode == NEWMV) {
3370 #ifdef MODE_TEST_HIT_STATS
3372 // Keep a rcord of the number of test hits at each size
3373 cpi->mode_test_hits[bsize]++;
3377 if (ref_frame == INTRA_FRAME) {
3379 // Disable intra modes other than DC_PRED for blocks with low variance
3380 // Threshold for intra skipping based on source variance
3381 // TODO(debargha): Specialize the threshold for super block sizes
3382 static const unsigned int skip_intra_var_thresh[BLOCK_SIZES] = {
3383 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
3385 if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
3386 this_mode != DC_PRED &&
3387 x->source_variance < skip_intra_var_thresh[mbmi->sb_type])
3389 // Only search the oblique modes if the best so far is
3390 // one of the neighboring directional modes
3391 if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
3392 (this_mode >= D45_PRED && this_mode <= TM_PRED)) {
3393 if (vp9_mode_order[best_mode_index].ref_frame > INTRA_FRAME)
3396 mbmi->mode = this_mode;
3397 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
3398 if (conditional_skipintra(mbmi->mode, best_intra_mode))
3402 super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL,
3403 bsize, tx_cache, best_rd);
3405 if (rate_y == INT_MAX)
3408 uv_tx = get_uv_tx_size_impl(mbmi->tx_size, bsize);
3409 if (rate_uv_intra[uv_tx] == INT_MAX) {
3410 choose_intra_uv_mode(cpi, ctx, bsize, uv_tx,
3411 &rate_uv_intra[uv_tx], &rate_uv_tokenonly[uv_tx],
3412 &dist_uv[uv_tx], &skip_uv[uv_tx], &mode_uv[uv_tx]);
3415 rate_uv = rate_uv_tokenonly[uv_tx];
3416 distortion_uv = dist_uv[uv_tx];
3417 skippable = skippable && skip_uv[uv_tx];
3418 mbmi->uv_mode = mode_uv[uv_tx];
3420 rate2 = rate_y + x->mbmode_cost[mbmi->mode] + rate_uv_intra[uv_tx];
3421 if (this_mode != DC_PRED && this_mode != TM_PRED)
3422 rate2 += intra_cost_penalty;
3423 distortion2 = distortion_y + distortion_uv;
3425 mbmi->mode = this_mode;
3426 compmode_cost = vp9_cost_bit(comp_mode_p, second_ref_frame > INTRA_FRAME);
3427 this_rd = handle_inter_mode(cpi, x, tile, bsize,
3429 &rate2, &distortion2, &skippable,
3430 &rate_y, &distortion_y,
3431 &rate_uv, &distortion_uv,
3432 &mode_excluded, &disable_skip,
3433 &tmp_best_filter, frame_mv,
3435 single_newmv, &total_sse, best_rd);
3436 if (this_rd == INT64_MAX)
3440 if (cm->reference_mode == REFERENCE_MODE_SELECT)
3441 rate2 += compmode_cost;
3443 // Estimate the reference frame signaling cost and add it
3444 // to the rolling cost variable.
3445 if (second_ref_frame > INTRA_FRAME) {
3446 rate2 += ref_costs_comp[ref_frame];
3448 rate2 += ref_costs_single[ref_frame];
3451 if (!disable_skip) {
3452 // Test for the condition where skip block will be activated
3453 // because there are no non zero coefficients and make any
3454 // necessary adjustment for rate. Ignore if skip is coded at
3455 // segment level as the cost wont have been added in.
3456 // Is Mb level skip allowed (i.e. not coded at segment level).
3457 const int mb_skip_allowed = !vp9_segfeature_active(seg, segment_id,
3461 // Back out the coefficient coding costs
3462 rate2 -= (rate_y + rate_uv);
3463 // for best yrd calculation
3466 if (mb_skip_allowed) {
3469 // Cost the skip mb case
3470 vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
3472 prob_skip_cost = vp9_cost_bit(skip_prob, 1);
3473 rate2 += prob_skip_cost;
3476 } else if (mb_skip_allowed && ref_frame != INTRA_FRAME && !xd->lossless) {
3477 if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
3478 RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
3479 // Add in the cost of the no skip flag.
3480 rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
3482 // FIXME(rbultje) make this work for splitmv also
3483 rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
3484 distortion2 = total_sse;
3485 assert(total_sse >= 0);
3486 rate2 -= (rate_y + rate_uv);
3491 } else if (mb_skip_allowed) {
3492 // Add in the cost of the no skip flag.
3493 rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
3496 // Calculate the final RD estimate for this mode.
3497 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
3500 // Keep record of best intra rd
3501 if (!is_inter_block(&xd->mi_8x8[0]->mbmi) &&
3502 this_rd < best_intra_rd) {
3503 best_intra_rd = this_rd;
3504 best_intra_mode = xd->mi_8x8[0]->mbmi.mode;
3507 // Keep record of best inter rd with single reference
3508 if (is_inter_block(&xd->mi_8x8[0]->mbmi) &&
3509 !has_second_ref(&xd->mi_8x8[0]->mbmi) &&
3510 !mode_excluded && this_rd < best_inter_rd) {
3511 best_inter_rd = this_rd;
3512 best_inter_ref_frame = ref_frame;
3515 if (!disable_skip && ref_frame == INTRA_FRAME) {
3516 for (i = 0; i < REFERENCE_MODES; ++i)
3517 best_pred_rd[i] = MIN(best_pred_rd[i], this_rd);
3518 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
3519 best_filter_rd[i] = MIN(best_filter_rd[i], this_rd);
3522 // Store the respective mode distortions for later use.
3523 if (mode_distortions[this_mode] == -1
3524 || distortion2 < mode_distortions[this_mode]) {
3525 mode_distortions[this_mode] = distortion2;
3527 if (frame_distortions[ref_frame] == -1
3528 || distortion2 < frame_distortions[ref_frame]) {
3529 frame_distortions[ref_frame] = distortion2;
3532 // Did this mode help.. i.e. is it the new best mode
3533 if (this_rd < best_rd || x->skip) {
3534 int max_plane = MAX_MB_PLANE;
3535 if (!mode_excluded) {
3536 // Note index of best mode so far
3537 best_mode_index = mode_index;
3539 if (ref_frame == INTRA_FRAME) {
3540 /* required for left and above block mv */
3541 mbmi->mv[0].as_int = 0;
3545 *returnrate = rate2;
3546 *returndistortion = distortion2;
3548 best_mbmode = *mbmi;
3549 best_skip2 = this_skip2;
3550 if (!x->select_txfm_size)
3551 swap_block_ptr(x, ctx, max_plane);
3552 vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
3553 sizeof(uint8_t) * ctx->num_4x4_blk);
3555 // TODO(debargha): enhance this test with a better distortion prediction
3556 // based on qp, activity mask and history
3557 if ((cpi->sf.mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
3558 (mode_index > MIN_EARLY_TERM_INDEX)) {
3559 const int qstep = xd->plane[0].dequant[1];
3560 // TODO(debargha): Enhance this by specializing for each mode_index
3562 if (x->source_variance < UINT_MAX) {
3563 const int var_adjust = (x->source_variance < 16);
3564 scale -= var_adjust;
3566 if (ref_frame > INTRA_FRAME &&
3567 distortion2 * scale < qstep * qstep) {
3574 /* keep record of best compound/single-only prediction */
3575 if (!disable_skip && ref_frame != INTRA_FRAME) {
3576 int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
3578 if (cm->reference_mode == REFERENCE_MODE_SELECT) {
3579 single_rate = rate2 - compmode_cost;
3580 hybrid_rate = rate2;
3582 single_rate = rate2;
3583 hybrid_rate = rate2 + compmode_cost;
3586 single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
3587 hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
3589 if (second_ref_frame <= INTRA_FRAME &&
3590 single_rd < best_pred_rd[SINGLE_REFERENCE]) {
3591 best_pred_rd[SINGLE_REFERENCE] = single_rd;
3592 } else if (second_ref_frame > INTRA_FRAME &&
3593 single_rd < best_pred_rd[COMPOUND_REFERENCE]) {
3594 best_pred_rd[COMPOUND_REFERENCE] = single_rd;
3596 if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT])
3597 best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
3600 /* keep record of best filter type */
3601 if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME &&
3602 cm->mcomp_filter_type != BILINEAR) {
3603 int64_t ref = cpi->rd_filter_cache[cm->mcomp_filter_type == SWITCHABLE ?
3604 SWITCHABLE_FILTERS : cm->mcomp_filter_type];
3606 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
3608 if (ref == INT64_MAX)
3610 else if (cpi->rd_filter_cache[i] == INT64_MAX)
3611 // when early termination is triggered, the encoder does not have
3612 // access to the rate-distortion cost. it only knows that the cost
3613 // should be above the maximum valid value. hence it takes the known
3614 // maximum plus an arbitrary constant as the rate-distortion cost.
3615 adj_rd = cpi->mask_filter_rd - ref + 10;
3617 adj_rd = cpi->rd_filter_cache[i] - ref;
3620 best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd);
3624 /* keep record of best txfm size */
3625 if (bsize < BLOCK_32X32) {
3626 if (bsize < BLOCK_16X16)
3627 tx_cache[ALLOW_16X16] = tx_cache[ALLOW_8X8];
3629 tx_cache[ALLOW_32X32] = tx_cache[ALLOW_16X16];
3631 if (!mode_excluded && this_rd != INT64_MAX) {
3632 for (i = 0; i < TX_MODES && tx_cache[i] < INT64_MAX; i++) {
3633 int64_t adj_rd = INT64_MAX;
3634 adj_rd = this_rd + tx_cache[i] - tx_cache[cm->tx_mode];
3636 if (adj_rd < best_tx_rd[i])
3637 best_tx_rd[i] = adj_rd;
3644 if (x->skip && !comp_pred)
3648 if (best_rd >= best_rd_so_far)
3651 // If we used an estimate for the uv intra rd in the loop above...
3652 if (cpi->sf.use_uv_intra_rd_estimate) {
3653 // Do Intra UV best rd mode selection if best mode choice above was intra.
3654 if (vp9_mode_order[best_mode_index].ref_frame == INTRA_FRAME) {
3656 *mbmi = best_mbmode;
3657 uv_tx_size = get_uv_tx_size(mbmi);
3658 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size],
3659 &rate_uv_tokenonly[uv_tx_size],
3660 &dist_uv[uv_tx_size],
3661 &skip_uv[uv_tx_size],
3662 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize,
3667 // If we are using reference masking and the set mask flag is set then
3668 // create the reference frame mask.
3669 if (cpi->sf.reference_masking && cpi->set_ref_frame_mask)
3670 cpi->ref_frame_mask = ~(1 << vp9_mode_order[best_mode_index].ref_frame);
3672 // Flag all modes that have a distortion thats > 2x the best we found at
3674 for (mode_index = 0; mode_index < MB_MODE_COUNT; ++mode_index) {
3675 if (mode_index == NEARESTMV || mode_index == NEARMV || mode_index == NEWMV)
3678 if (mode_distortions[mode_index] > 2 * *returndistortion) {
3679 ctx->modes_with_high_error |= (1 << mode_index);
3683 // Flag all ref frames that have a distortion thats > 2x the best we found at
3685 for (ref_frame = INTRA_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
3686 if (frame_distortions[ref_frame] > 2 * *returndistortion) {
3687 ctx->frames_with_high_error |= (1 << ref_frame);
3691 assert((cm->mcomp_filter_type == SWITCHABLE) ||
3692 (cm->mcomp_filter_type == best_mbmode.interp_filter) ||
3693 !is_inter_block(&best_mbmode));
3695 // Updating rd_thresh_freq_fact[] here means that the different
3696 // partition/block sizes are handled independently based on the best
3697 // choice for the current partition. It may well be better to keep a scaled
3698 // best rd so far value and update rd_thresh_freq_fact based on the mode/size
3699 // combination that wins out.
3700 if (cpi->sf.adaptive_rd_thresh) {
3701 for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
3702 if (mode_index == best_mode_index) {
3703 cpi->rd_thresh_freq_fact[bsize][mode_index] -=
3704 (cpi->rd_thresh_freq_fact[bsize][mode_index] >> 3);
3706 cpi->rd_thresh_freq_fact[bsize][mode_index] += RD_THRESH_INC;
3707 if (cpi->rd_thresh_freq_fact[bsize][mode_index] >
3708 (cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT)) {
3709 cpi->rd_thresh_freq_fact[bsize][mode_index] =
3710 cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT;
3717 *mbmi = best_mbmode;
3718 x->skip |= best_skip2;
3720 for (i = 0; i < REFERENCE_MODES; ++i) {
3721 if (best_pred_rd[i] == INT64_MAX)
3722 best_pred_diff[i] = INT_MIN;
3724 best_pred_diff[i] = best_rd - best_pred_rd[i];
3728 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
3729 if (best_filter_rd[i] == INT64_MAX)
3730 best_filter_diff[i] = 0;
3732 best_filter_diff[i] = best_rd - best_filter_rd[i];
3734 if (cm->mcomp_filter_type == SWITCHABLE)
3735 assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
3737 vp9_zero(best_filter_diff);
3741 for (i = 0; i < TX_MODES; i++) {
3742 if (best_tx_rd[i] == INT64_MAX)
3743 best_tx_diff[i] = 0;
3745 best_tx_diff[i] = best_rd - best_tx_rd[i];
3748 vp9_zero(best_tx_diff);
3751 set_scale_factors(cm, xd, mbmi->ref_frame[0] - 1, mbmi->ref_frame[1] - 1);
3752 store_coding_context(x, ctx, best_mode_index,
3753 &mbmi->ref_mvs[mbmi->ref_frame[0]][0],
3754 &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 :
3755 mbmi->ref_frame[1]][0],
3756 best_pred_diff, best_tx_diff, best_filter_diff);
3762 int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
3763 const TileInfo *const tile,
3764 int mi_row, int mi_col,
3766 int64_t *returndistortion,
3768 PICK_MODE_CONTEXT *ctx,
3769 int64_t best_rd_so_far) {
3770 VP9_COMMON *cm = &cpi->common;
3771 MACROBLOCKD *xd = &x->e_mbd;
3772 MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
3773 const struct segmentation *seg = &cm->seg;
3774 const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]);
3775 MV_REFERENCE_FRAME ref_frame, second_ref_frame;
3776 unsigned char segment_id = mbmi->segment_id;
3778 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
3779 struct buf_2d yv12_mb[4][MAX_MB_PLANE];
3780 static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
3782 int idx_list[4] = {0,
3786 int64_t best_rd = best_rd_so_far;
3787 int64_t best_yrd = best_rd_so_far; // FIXME(rbultje) more precise
3788 int64_t best_tx_rd[TX_MODES];
3789 int64_t best_tx_diff[TX_MODES];
3790 int64_t best_pred_diff[REFERENCE_MODES];
3791 int64_t best_pred_rd[REFERENCE_MODES];
3792 int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
3793 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
3794 MB_MODE_INFO best_mbmode = { 0 };
3795 int mode_index, best_mode_index = 0;
3796 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
3797 vp9_prob comp_mode_p;
3798 int64_t best_inter_rd = INT64_MAX;
3799 MV_REFERENCE_FRAME best_inter_ref_frame = LAST_FRAME;
3800 INTERPOLATION_TYPE tmp_best_filter = SWITCHABLE;
3801 int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES];
3802 int64_t dist_uv[TX_SIZES];
3803 int skip_uv[TX_SIZES];
3804 MB_PREDICTION_MODE mode_uv[TX_SIZES] = { 0 };
3805 int intra_cost_penalty = 20 * vp9_dc_quant(cpi->common.base_qindex,
3806 cpi->common.y_dc_delta_q);
3807 int_mv seg_mvs[4][MAX_REF_FRAMES];
3808 b_mode_info best_bmodes[4];
3811 x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
3812 vpx_memset(x->zcoeff_blk[TX_4X4], 0, 4);
3814 for (i = 0; i < 4; i++) {
3816 for (j = 0; j < MAX_REF_FRAMES; j++)
3817 seg_mvs[i][j].as_int = INVALID_MV;
3820 estimate_ref_frame_costs(cpi, segment_id, ref_costs_single, ref_costs_comp,
3823 for (i = 0; i < REFERENCE_MODES; ++i)
3824 best_pred_rd[i] = INT64_MAX;
3825 for (i = 0; i < TX_MODES; i++)
3826 best_tx_rd[i] = INT64_MAX;
3827 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
3828 best_filter_rd[i] = INT64_MAX;
3829 for (i = 0; i < TX_SIZES; i++)
3830 rate_uv_intra[i] = INT_MAX;
3832 *returnrate = INT_MAX;
3834 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
3835 if (cpi->ref_frame_flags & flag_list[ref_frame]) {
3836 setup_buffer_inter(cpi, x, tile, idx_list[ref_frame], ref_frame,
3837 block_size, mi_row, mi_col,
3838 frame_mv[NEARESTMV], frame_mv[NEARMV],
3841 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
3842 frame_mv[ZEROMV][ref_frame].as_int = 0;
3845 for (mode_index = 0; mode_index < MAX_REFS; ++mode_index) {
3846 int mode_excluded = 0;
3847 int64_t this_rd = INT64_MAX;
3848 int disable_skip = 0;
3849 int compmode_cost = 0;
3850 int rate2 = 0, rate_y = 0, rate_uv = 0;
3851 int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
3853 int64_t tx_cache[TX_MODES];
3856 int64_t total_sse = INT_MAX;
3859 for (i = 0; i < TX_MODES; ++i)
3860 tx_cache[i] = INT64_MAX;
3863 ref_frame = vp9_ref_order[mode_index].ref_frame;
3864 second_ref_frame = vp9_ref_order[mode_index].second_ref_frame;
3866 // Look at the reference frame of the best mode so far and set the
3867 // skip mask to look at a subset of the remaining modes.
3868 if (mode_index > 2 && cpi->sf.mode_skip_start < MAX_MODES) {
3869 if (mode_index == 3) {
3870 switch (vp9_ref_order[best_mode_index].ref_frame) {
3872 cpi->mode_skip_mask = 0;
3875 cpi->mode_skip_mask = 0x0010;
3878 cpi->mode_skip_mask = 0x0008;
3881 cpi->mode_skip_mask = 0x0000;
3884 case MAX_REF_FRAMES:
3885 assert(0 && "Invalid Reference frame");
3888 if (cpi->mode_skip_mask & ((int64_t)1 << mode_index))
3892 // Skip if the current reference frame has been masked off
3893 if (cpi->sf.reference_masking && !cpi->set_ref_frame_mask &&
3894 (cpi->ref_frame_mask & (1 << ref_frame)))
3897 // Test best rd so far against threshold for trying this mode.
3899 ((int64_t)cpi->rd_thresh_sub8x8[segment_id][bsize][mode_index] *
3900 cpi->rd_thresh_freq_sub8x8[bsize][mode_index] >> 5)) ||
3901 cpi->rd_thresh_sub8x8[segment_id][bsize][mode_index] == INT_MAX)
3904 // Do not allow compound prediction if the segment level reference
3905 // frame feature is in use as in this case there can only be one reference.
3906 if ((second_ref_frame > INTRA_FRAME) &&
3907 vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
3910 mbmi->ref_frame[0] = ref_frame;
3911 mbmi->ref_frame[1] = second_ref_frame;
3913 if (!(ref_frame == INTRA_FRAME
3914 || (cpi->ref_frame_flags & flag_list[ref_frame]))) {
3917 if (!(second_ref_frame == NONE
3918 || (cpi->ref_frame_flags & flag_list[second_ref_frame]))) {
3922 comp_pred = second_ref_frame > INTRA_FRAME;
3924 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA)
3925 if (vp9_ref_order[best_mode_index].ref_frame == INTRA_FRAME)
3927 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH)
3928 if (ref_frame != best_inter_ref_frame &&
3929 second_ref_frame != best_inter_ref_frame)
3933 // TODO(jingning, jkoleszar): scaling reference frame not supported for
3935 if (ref_frame > 0 &&
3936 vp9_is_scaled(&cpi->common.active_ref_scale[ref_frame - 1]))
3939 if (second_ref_frame > 0 &&
3940 vp9_is_scaled(&cpi->common.active_ref_scale[second_ref_frame - 1]))
3943 set_scale_factors(cm, xd, ref_frame - 1, second_ref_frame - 1);
3944 mbmi->uv_mode = DC_PRED;
3946 // Evaluate all sub-pel filters irrespective of whether we can use
3947 // them for this frame.
3948 mbmi->interp_filter = cm->mcomp_filter_type;
3949 vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
3952 if (!(cpi->ref_frame_flags & flag_list[second_ref_frame]))
3955 mode_excluded = mode_excluded ? mode_excluded
3956 : cm->reference_mode == SINGLE_REFERENCE;
3958 if (ref_frame != INTRA_FRAME && second_ref_frame != INTRA_FRAME) {
3959 mode_excluded = mode_excluded ?
3960 mode_excluded : cm->reference_mode == COMPOUND_REFERENCE;
3964 // Select prediction reference frames.
3965 for (i = 0; i < MAX_MB_PLANE; i++) {
3966 xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
3968 xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
3971 // If the segment reference frame feature is enabled....
3972 // then do nothing if the current ref frame is not allowed..
3973 if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
3974 vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) !=
3977 // If the segment skip feature is enabled....
3978 // then do nothing if the current mode is not allowed..
3979 } else if (vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP) &&
3980 ref_frame != INTRA_FRAME) {
3982 // Disable this drop out case if the ref frame
3983 // segment level feature is enabled for this segment. This is to
3984 // prevent the possibility that we end up unable to pick any mode.
3985 } else if (!vp9_segfeature_active(seg, segment_id,
3986 SEG_LVL_REF_FRAME)) {
3987 // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
3988 // unless ARNR filtering is enabled in which case we want
3989 // an unfiltered alternative. We allow near/nearest as well
3990 // because they may result in zero-zero MVs but be cheaper.
3991 if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0))
3995 #ifdef MODE_TEST_HIT_STATS
3997 // Keep a rcord of the number of test hits at each size
3998 cpi->mode_test_hits[bsize]++;
4001 if (ref_frame == INTRA_FRAME) {
4003 mbmi->tx_size = TX_4X4;
4004 if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate, &rate_y,
4005 &distortion_y, best_rd) >= best_rd)
4008 rate2 += intra_cost_penalty;
4009 distortion2 += distortion_y;
4011 if (rate_uv_intra[TX_4X4] == INT_MAX) {
4012 choose_intra_uv_mode(cpi, ctx, bsize, TX_4X4,
4013 &rate_uv_intra[TX_4X4],
4014 &rate_uv_tokenonly[TX_4X4],
4015 &dist_uv[TX_4X4], &skip_uv[TX_4X4],
4018 rate2 += rate_uv_intra[TX_4X4];
4019 rate_uv = rate_uv_tokenonly[TX_4X4];
4020 distortion2 += dist_uv[TX_4X4];
4021 distortion_uv = dist_uv[TX_4X4];
4022 mbmi->uv_mode = mode_uv[TX_4X4];
4023 tx_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
4024 for (i = 0; i < TX_MODES; ++i)
4025 tx_cache[i] = tx_cache[ONLY_4X4];
4029 int64_t this_rd_thresh;
4030 int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX;
4031 int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX;
4032 int64_t tmp_best_distortion = INT_MAX, tmp_best_sse, uv_sse;
4033 int tmp_best_skippable = 0;
4034 int switchable_filter_index;
4035 int_mv *second_ref = comp_pred ?
4036 &mbmi->ref_mvs[second_ref_frame][0] : NULL;
4037 b_mode_info tmp_best_bmodes[16];
4038 MB_MODE_INFO tmp_best_mbmode;
4039 BEST_SEG_INFO bsi[SWITCHABLE_FILTERS];
4040 int pred_exists = 0;
4043 this_rd_thresh = (ref_frame == LAST_FRAME) ?
4044 cpi->rd_thresh_sub8x8[segment_id][bsize][THR_LAST] :
4045 cpi->rd_thresh_sub8x8[segment_id][bsize][THR_ALTR];
4046 this_rd_thresh = (ref_frame == GOLDEN_FRAME) ?
4047 cpi->rd_thresh_sub8x8[segment_id][bsize][THR_GOLD] : this_rd_thresh;
4048 xd->mi_8x8[0]->mbmi.tx_size = TX_4X4;
4050 cpi->mask_filter_rd = 0;
4051 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
4052 cpi->rd_filter_cache[i] = INT64_MAX;
4054 if (cm->mcomp_filter_type != BILINEAR) {
4055 tmp_best_filter = EIGHTTAP;
4056 if (x->source_variance <
4057 cpi->sf.disable_filter_search_var_thresh) {
4058 tmp_best_filter = EIGHTTAP;
4059 } else if (cpi->sf.adaptive_pred_filter_type == 1 &&
4060 ctx->pred_filter_type < SWITCHABLE) {
4061 tmp_best_filter = ctx->pred_filter_type;
4062 } else if (cpi->sf.adaptive_pred_filter_type == 2) {
4063 tmp_best_filter = ctx->pred_filter_type < SWITCHABLE ?
4064 ctx->pred_filter_type : 0;
4066 for (switchable_filter_index = 0;
4067 switchable_filter_index < SWITCHABLE_FILTERS;
4068 ++switchable_filter_index) {
4071 mbmi->interp_filter = switchable_filter_index;
4072 vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
4074 tmp_rd = rd_pick_best_mbsegmentation(cpi, x, tile,
4075 &mbmi->ref_mvs[ref_frame][0],
4078 &rate, &rate_y, &distortion,
4079 &skippable, &total_sse,
4080 (int)this_rd_thresh, seg_mvs,
4081 bsi, switchable_filter_index,
4084 if (tmp_rd == INT64_MAX)
4086 rs = get_switchable_rate(x);
4087 rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
4088 cpi->rd_filter_cache[switchable_filter_index] = tmp_rd;
4089 cpi->rd_filter_cache[SWITCHABLE_FILTERS] =
4090 MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS],
4092 if (cm->mcomp_filter_type == SWITCHABLE)
4095 cpi->mask_filter_rd = MAX(cpi->mask_filter_rd, tmp_rd);
4097 newbest = (tmp_rd < tmp_best_rd);
4099 tmp_best_filter = mbmi->interp_filter;
4100 tmp_best_rd = tmp_rd;
4102 if ((newbest && cm->mcomp_filter_type == SWITCHABLE) ||
4103 (mbmi->interp_filter == cm->mcomp_filter_type &&
4104 cm->mcomp_filter_type != SWITCHABLE)) {
4105 tmp_best_rdu = tmp_rd;
4106 tmp_best_rate = rate;
4107 tmp_best_ratey = rate_y;
4108 tmp_best_distortion = distortion;
4109 tmp_best_sse = total_sse;
4110 tmp_best_skippable = skippable;
4111 tmp_best_mbmode = *mbmi;
4112 for (i = 0; i < 4; i++) {
4113 tmp_best_bmodes[i] = xd->mi_8x8[0]->bmi[i];
4114 x->zcoeff_blk[TX_4X4][i] = !x->plane[0].eobs[i];
4117 if (switchable_filter_index == 0 &&
4118 cpi->sf.use_rd_breakout &&
4119 best_rd < INT64_MAX) {
4120 if (tmp_best_rdu / 2 > best_rd) {
4121 // skip searching the other filters if the first is
4122 // already substantially larger than the best so far
4123 tmp_best_filter = mbmi->interp_filter;
4124 tmp_best_rdu = INT64_MAX;
4129 } // switchable_filter_index loop
4133 if (tmp_best_rdu == INT64_MAX && pred_exists)
4136 mbmi->interp_filter = (cm->mcomp_filter_type == SWITCHABLE ?
4137 tmp_best_filter : cm->mcomp_filter_type);
4138 vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common);
4140 // Handles the special case when a filter that is not in the
4141 // switchable list (bilinear, 6-tap) is indicated at the frame level
4142 tmp_rd = rd_pick_best_mbsegmentation(cpi, x, tile,
4143 &mbmi->ref_mvs[ref_frame][0],
4146 &rate, &rate_y, &distortion,
4147 &skippable, &total_sse,
4148 (int)this_rd_thresh, seg_mvs,
4151 if (tmp_rd == INT64_MAX)
4154 if (cpi->common.mcomp_filter_type == SWITCHABLE) {
4155 int rs = get_switchable_rate(x);
4156 tmp_best_rdu -= RDCOST(x->rdmult, x->rddiv, rs, 0);
4158 tmp_rd = tmp_best_rdu;
4159 total_sse = tmp_best_sse;
4160 rate = tmp_best_rate;
4161 rate_y = tmp_best_ratey;
4162 distortion = tmp_best_distortion;
4163 skippable = tmp_best_skippable;
4164 *mbmi = tmp_best_mbmode;
4165 for (i = 0; i < 4; i++)
4166 xd->mi_8x8[0]->bmi[i] = tmp_best_bmodes[i];
4170 distortion2 += distortion;
4172 if (cpi->common.mcomp_filter_type == SWITCHABLE)
4173 rate2 += get_switchable_rate(x);
4176 mode_excluded = comp_pred
4177 ? cpi->common.reference_mode == SINGLE_REFERENCE
4178 : cpi->common.reference_mode == COMPOUND_REFERENCE;
4180 compmode_cost = vp9_cost_bit(comp_mode_p, comp_pred);
4182 tmp_best_rdu = best_rd -
4183 MIN(RDCOST(x->rdmult, x->rddiv, rate2, distortion2),
4184 RDCOST(x->rdmult, x->rddiv, 0, total_sse));
4186 if (tmp_best_rdu > 0) {
4187 // If even the 'Y' rd value of split is higher than best so far
4188 // then dont bother looking at UV
4189 vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col,
4191 super_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,
4192 &uv_sse, BLOCK_8X8, tmp_best_rdu);
4193 if (rate_uv == INT_MAX)
4196 distortion2 += distortion_uv;
4197 skippable = skippable && uv_skippable;
4198 total_sse += uv_sse;
4200 tx_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
4201 for (i = 0; i < TX_MODES; ++i)
4202 tx_cache[i] = tx_cache[ONLY_4X4];
4206 if (cpi->common.reference_mode == REFERENCE_MODE_SELECT)
4207 rate2 += compmode_cost;
4209 // Estimate the reference frame signaling cost and add it
4210 // to the rolling cost variable.
4211 if (second_ref_frame > INTRA_FRAME) {
4212 rate2 += ref_costs_comp[ref_frame];
4214 rate2 += ref_costs_single[ref_frame];
4217 if (!disable_skip) {
4218 // Test for the condition where skip block will be activated
4219 // because there are no non zero coefficients and make any
4220 // necessary adjustment for rate. Ignore if skip is coded at
4221 // segment level as the cost wont have been added in.
4222 // Is Mb level skip allowed (i.e. not coded at segment level).
4223 const int mb_skip_allowed = !vp9_segfeature_active(seg, segment_id,
4226 if (mb_skip_allowed && ref_frame != INTRA_FRAME && !xd->lossless) {
4227 if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
4228 RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
4229 // Add in the cost of the no skip flag.
4230 rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
4232 // FIXME(rbultje) make this work for splitmv also
4233 rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
4234 distortion2 = total_sse;
4235 assert(total_sse >= 0);
4236 rate2 -= (rate_y + rate_uv);
4241 } else if (mb_skip_allowed) {
4242 // Add in the cost of the no skip flag.
4243 rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
4246 // Calculate the final RD estimate for this mode.
4247 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
4250 // Keep record of best inter rd with single reference
4251 if (is_inter_block(&xd->mi_8x8[0]->mbmi) &&
4252 !has_second_ref(&xd->mi_8x8[0]->mbmi) &&
4254 this_rd < best_inter_rd) {
4255 best_inter_rd = this_rd;
4256 best_inter_ref_frame = ref_frame;
4259 if (!disable_skip && ref_frame == INTRA_FRAME) {
4260 for (i = 0; i < REFERENCE_MODES; ++i)
4261 best_pred_rd[i] = MIN(best_pred_rd[i], this_rd);
4262 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
4263 best_filter_rd[i] = MIN(best_filter_rd[i], this_rd);
4266 // Did this mode help.. i.e. is it the new best mode
4267 if (this_rd < best_rd || x->skip) {
4268 if (!mode_excluded) {
4269 int max_plane = MAX_MB_PLANE;
4270 // Note index of best mode so far
4271 best_mode_index = mode_index;
4273 if (ref_frame == INTRA_FRAME) {
4274 /* required for left and above block mv */
4275 mbmi->mv[0].as_int = 0;
4279 *returnrate = rate2;
4280 *returndistortion = distortion2;
4282 best_yrd = best_rd -
4283 RDCOST(x->rdmult, x->rddiv, rate_uv, distortion_uv);
4284 best_mbmode = *mbmi;
4285 best_skip2 = this_skip2;
4286 if (!x->select_txfm_size)
4287 swap_block_ptr(x, ctx, max_plane);
4288 vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
4289 sizeof(uint8_t) * ctx->num_4x4_blk);
4291 for (i = 0; i < 4; i++)
4292 best_bmodes[i] = xd->mi_8x8[0]->bmi[i];
4294 // TODO(debargha): enhance this test with a better distortion prediction
4295 // based on qp, activity mask and history
4296 if ((cpi->sf.mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
4297 (mode_index > MIN_EARLY_TERM_INDEX)) {
4298 const int qstep = xd->plane[0].dequant[1];
4299 // TODO(debargha): Enhance this by specializing for each mode_index
4301 if (x->source_variance < UINT_MAX) {
4302 const int var_adjust = (x->source_variance < 16);
4303 scale -= var_adjust;
4305 if (ref_frame > INTRA_FRAME &&
4306 distortion2 * scale < qstep * qstep) {
4313 /* keep record of best compound/single-only prediction */
4314 if (!disable_skip && ref_frame != INTRA_FRAME) {
4315 int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
4317 if (cpi->common.reference_mode == REFERENCE_MODE_SELECT) {
4318 single_rate = rate2 - compmode_cost;
4319 hybrid_rate = rate2;
4321 single_rate = rate2;
4322 hybrid_rate = rate2 + compmode_cost;
4325 single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
4326 hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
4328 if (second_ref_frame <= INTRA_FRAME &&
4329 single_rd < best_pred_rd[SINGLE_REFERENCE]) {
4330 best_pred_rd[SINGLE_REFERENCE] = single_rd;
4331 } else if (second_ref_frame > INTRA_FRAME &&
4332 single_rd < best_pred_rd[COMPOUND_REFERENCE]) {
4333 best_pred_rd[COMPOUND_REFERENCE] = single_rd;
4335 if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT])
4336 best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
4339 /* keep record of best filter type */
4340 if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME &&
4341 cm->mcomp_filter_type != BILINEAR) {
4342 int64_t ref = cpi->rd_filter_cache[cm->mcomp_filter_type == SWITCHABLE ?
4343 SWITCHABLE_FILTERS : cm->mcomp_filter_type];
4345 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
4346 if (ref == INT64_MAX)
4348 else if (cpi->rd_filter_cache[i] == INT64_MAX)
4349 // when early termination is triggered, the encoder does not have
4350 // access to the rate-distortion cost. it only knows that the cost
4351 // should be above the maximum valid value. hence it takes the known
4352 // maximum plus an arbitrary constant as the rate-distortion cost.
4353 adj_rd = cpi->mask_filter_rd - ref + 10;
4355 adj_rd = cpi->rd_filter_cache[i] - ref;
4358 best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd);
4362 /* keep record of best txfm size */
4363 if (bsize < BLOCK_32X32) {
4364 if (bsize < BLOCK_16X16) {
4365 tx_cache[ALLOW_8X8] = tx_cache[ONLY_4X4];
4366 tx_cache[ALLOW_16X16] = tx_cache[ALLOW_8X8];
4368 tx_cache[ALLOW_32X32] = tx_cache[ALLOW_16X16];
4370 if (!mode_excluded && this_rd != INT64_MAX) {
4371 for (i = 0; i < TX_MODES && tx_cache[i] < INT64_MAX; i++) {
4372 int64_t adj_rd = INT64_MAX;
4373 if (ref_frame > INTRA_FRAME)
4374 adj_rd = this_rd + tx_cache[i] - tx_cache[cm->tx_mode];
4378 if (adj_rd < best_tx_rd[i])
4379 best_tx_rd[i] = adj_rd;
4386 if (x->skip && !comp_pred)
4390 if (best_rd >= best_rd_so_far)
4393 // If we used an estimate for the uv intra rd in the loop above...
4394 if (cpi->sf.use_uv_intra_rd_estimate) {
4395 // Do Intra UV best rd mode selection if best mode choice above was intra.
4396 if (vp9_ref_order[best_mode_index].ref_frame == INTRA_FRAME) {
4398 *mbmi = best_mbmode;
4399 uv_tx_size = get_uv_tx_size(mbmi);
4400 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size],
4401 &rate_uv_tokenonly[uv_tx_size],
4402 &dist_uv[uv_tx_size],
4403 &skip_uv[uv_tx_size],
4404 BLOCK_8X8, uv_tx_size);
4408 // If we are using reference masking and the set mask flag is set then
4409 // create the reference frame mask.
4410 if (cpi->sf.reference_masking && cpi->set_ref_frame_mask)
4411 cpi->ref_frame_mask = ~(1 << vp9_ref_order[best_mode_index].ref_frame);
4413 if (best_rd == INT64_MAX && bsize < BLOCK_8X8) {
4414 *returnrate = INT_MAX;
4415 *returndistortion = INT_MAX;
4419 assert((cm->mcomp_filter_type == SWITCHABLE) ||
4420 (cm->mcomp_filter_type == best_mbmode.interp_filter) ||
4421 !is_inter_block(&best_mbmode));
4423 // Updating rd_thresh_freq_fact[] here means that the different
4424 // partition/block sizes are handled independently based on the best
4425 // choice for the current partition. It may well be better to keep a scaled
4426 // best rd so far value and update rd_thresh_freq_fact based on the mode/size
4427 // combination that wins out.
4428 if (cpi->sf.adaptive_rd_thresh) {
4429 for (mode_index = 0; mode_index < MAX_REFS; ++mode_index) {
4430 if (mode_index == best_mode_index) {
4431 cpi->rd_thresh_freq_sub8x8[bsize][mode_index] -=
4432 (cpi->rd_thresh_freq_sub8x8[bsize][mode_index] >> 3);
4434 cpi->rd_thresh_freq_sub8x8[bsize][mode_index] += RD_THRESH_INC;
4435 if (cpi->rd_thresh_freq_sub8x8[bsize][mode_index] >
4436 (cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT)) {
4437 cpi->rd_thresh_freq_sub8x8[bsize][mode_index] =
4438 cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT;
4445 *mbmi = best_mbmode;
4446 x->skip |= best_skip2;
4447 if (!is_inter_block(&best_mbmode)) {
4448 for (i = 0; i < 4; i++)
4449 xd->mi_8x8[0]->bmi[i].as_mode = best_bmodes[i].as_mode;
4451 for (i = 0; i < 4; ++i)
4452 vpx_memcpy(&xd->mi_8x8[0]->bmi[i], &best_bmodes[i], sizeof(b_mode_info));
4454 mbmi->mv[0].as_int = xd->mi_8x8[0]->bmi[3].as_mv[0].as_int;
4455 mbmi->mv[1].as_int = xd->mi_8x8[0]->bmi[3].as_mv[1].as_int;
4458 for (i = 0; i < REFERENCE_MODES; ++i) {
4459 if (best_pred_rd[i] == INT64_MAX)
4460 best_pred_diff[i] = INT_MIN;
4462 best_pred_diff[i] = best_rd - best_pred_rd[i];
4466 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
4467 if (best_filter_rd[i] == INT64_MAX)
4468 best_filter_diff[i] = 0;
4470 best_filter_diff[i] = best_rd - best_filter_rd[i];
4472 if (cm->mcomp_filter_type == SWITCHABLE)
4473 assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
4475 vp9_zero(best_filter_diff);
4479 for (i = 0; i < TX_MODES; i++) {
4480 if (best_tx_rd[i] == INT64_MAX)
4481 best_tx_diff[i] = 0;
4483 best_tx_diff[i] = best_rd - best_tx_rd[i];
4486 vp9_zero(best_tx_diff);
4489 set_scale_factors(cm, xd, mbmi->ref_frame[0] - 1, mbmi->ref_frame[1] - 1);
4490 store_coding_context(x, ctx, best_mode_index,
4491 &mbmi->ref_mvs[mbmi->ref_frame[0]][0],
4492 &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 :
4493 mbmi->ref_frame[1]][0],
4494 best_pred_diff, best_tx_diff, best_filter_diff);