2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
16 #include "vp9/common/vp9_pragmas.h"
17 #include "vp9/encoder/vp9_tokenize.h"
18 #include "vp9/encoder/vp9_treewriter.h"
19 #include "vp9/encoder/vp9_onyx_int.h"
20 #include "vp9/common/vp9_entropymode.h"
21 #include "vp9/common/vp9_reconinter.h"
22 #include "vp9/common/vp9_reconintra.h"
23 #include "vp9/common/vp9_quant_common.h"
24 #include "vp9/encoder/vp9_encodemb.h"
25 #include "vp9/encoder/vp9_quantize.h"
26 #include "vp9/encoder/vp9_variance.h"
27 #include "vp9/encoder/vp9_mcomp.h"
28 #include "vp9/encoder/vp9_rdopt.h"
29 #include "vp9/encoder/vp9_ratectrl.h"
30 #include "vpx_mem/vpx_mem.h"
31 #include "vp9/common/vp9_systemdependent.h"
32 #include "vp9/encoder/vp9_encodemv.h"
33 #include "vp9/common/vp9_seg_common.h"
34 #include "vp9/common/vp9_pred_common.h"
35 #include "vp9/common/vp9_entropy.h"
36 #include "./vp9_rtcd.h"
37 #include "vp9/common/vp9_mvref_common.h"
38 #include "vp9/common/vp9_common.h"
40 /* Factor to weigh the rate for switchable interp filters */
41 #define SWITCHABLE_INTERP_RATE_FACTOR 1
43 #define LAST_FRAME_MODE_MASK 0xFFEDCD60
44 #define GOLDEN_FRAME_MODE_MASK 0xFFDA3BB0
45 #define ALT_REF_MODE_MASK 0xFFC648D0
47 #define MIN_EARLY_TERM_INDEX 3
50 MB_PREDICTION_MODE mode;
51 MV_REFERENCE_FRAME ref_frame[2];
55 MV_REFERENCE_FRAME ref_frame[2];
58 struct rdcost_block_args {
60 ENTROPY_CONTEXT t_above[16];
61 ENTROPY_CONTEXT t_left[16];
74 const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
75 {NEARESTMV, {LAST_FRAME, NONE}},
76 {NEARESTMV, {ALTREF_FRAME, NONE}},
77 {NEARESTMV, {GOLDEN_FRAME, NONE}},
79 {DC_PRED, {INTRA_FRAME, NONE}},
81 {NEWMV, {LAST_FRAME, NONE}},
82 {NEWMV, {ALTREF_FRAME, NONE}},
83 {NEWMV, {GOLDEN_FRAME, NONE}},
85 {NEARMV, {LAST_FRAME, NONE}},
86 {NEARMV, {ALTREF_FRAME, NONE}},
87 {NEARESTMV, {LAST_FRAME, ALTREF_FRAME}},
88 {NEARESTMV, {GOLDEN_FRAME, ALTREF_FRAME}},
90 {TM_PRED, {INTRA_FRAME, NONE}},
92 {NEARMV, {LAST_FRAME, ALTREF_FRAME}},
93 {NEWMV, {LAST_FRAME, ALTREF_FRAME}},
94 {NEARMV, {GOLDEN_FRAME, NONE}},
95 {NEARMV, {GOLDEN_FRAME, ALTREF_FRAME}},
96 {NEWMV, {GOLDEN_FRAME, ALTREF_FRAME}},
98 {ZEROMV, {LAST_FRAME, NONE}},
99 {ZEROMV, {GOLDEN_FRAME, NONE}},
100 {ZEROMV, {ALTREF_FRAME, NONE}},
101 {ZEROMV, {LAST_FRAME, ALTREF_FRAME}},
102 {ZEROMV, {GOLDEN_FRAME, ALTREF_FRAME}},
104 {H_PRED, {INTRA_FRAME, NONE}},
105 {V_PRED, {INTRA_FRAME, NONE}},
106 {D135_PRED, {INTRA_FRAME, NONE}},
107 {D207_PRED, {INTRA_FRAME, NONE}},
108 {D153_PRED, {INTRA_FRAME, NONE}},
109 {D63_PRED, {INTRA_FRAME, NONE}},
110 {D117_PRED, {INTRA_FRAME, NONE}},
111 {D45_PRED, {INTRA_FRAME, NONE}},
114 const REF_DEFINITION vp9_ref_order[MAX_REFS] = {
115 {{LAST_FRAME, NONE}},
116 {{GOLDEN_FRAME, NONE}},
117 {{ALTREF_FRAME, NONE}},
118 {{LAST_FRAME, ALTREF_FRAME}},
119 {{GOLDEN_FRAME, ALTREF_FRAME}},
120 {{INTRA_FRAME, NONE}},
123 // The baseline rd thresholds for breaking out of the rd loop for
124 // certain modes are assumed to be based on 8x8 blocks.
125 // This table is used to correct for blocks size.
126 // The factors here are << 2 (2 = x0.5, 32 = x8 etc).
127 static int rd_thresh_block_size_factor[BLOCK_SIZES] =
128 {2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32};
130 static int raster_block_offset(BLOCK_SIZE plane_bsize,
131 int raster_block, int stride) {
132 const int bw = b_width_log2(plane_bsize);
133 const int y = 4 * (raster_block >> bw);
134 const int x = 4 * (raster_block & ((1 << bw) - 1));
135 return y * stride + x;
137 static int16_t* raster_block_offset_int16(BLOCK_SIZE plane_bsize,
138 int raster_block, int16_t *base) {
139 const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
140 return base + raster_block_offset(plane_bsize, raster_block, stride);
143 static void fill_mode_costs(VP9_COMP *cpi) {
144 VP9_COMMON *const cm = &cpi->common;
145 MACROBLOCK *const x = &cpi->mb;
146 FRAME_CONTEXT *const fc = &cm->fc;
149 for (i = 0; i < INTRA_MODES; i++)
150 for (j = 0; j < INTRA_MODES; j++)
151 vp9_cost_tokens((int *)x->y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j],
152 vp9_intra_mode_tree);
154 // TODO(rbultje) separate tables for superblock costing?
155 vp9_cost_tokens(x->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree);
156 vp9_cost_tokens(x->intra_uv_mode_cost[1],
157 fc->uv_mode_prob[INTRA_MODES - 1], vp9_intra_mode_tree);
158 vp9_cost_tokens(x->intra_uv_mode_cost[0],
159 vp9_kf_uv_mode_prob[INTRA_MODES - 1], vp9_intra_mode_tree);
161 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
162 vp9_cost_tokens((int *)x->switchable_interp_costs[i],
163 fc->switchable_interp_prob[i],
164 vp9_switchable_interp_tree);
167 static void fill_token_costs(vp9_coeff_cost *c,
168 vp9_coeff_probs_model (*p)[PLANE_TYPES]) {
171 for (t = TX_4X4; t <= TX_32X32; ++t)
172 for (i = 0; i < PLANE_TYPES; ++i)
173 for (j = 0; j < REF_TYPES; ++j)
174 for (k = 0; k < COEF_BANDS; ++k)
175 for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
176 vp9_prob probs[ENTROPY_NODES];
177 vp9_model_to_full_probs(p[t][i][j][k][l], probs);
178 vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs,
180 vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
182 assert(c[t][i][j][k][0][l][EOB_TOKEN] ==
183 c[t][i][j][k][1][l][EOB_TOKEN]);
187 static const int rd_iifactor[32] = {
188 4, 4, 3, 2, 1, 0, 0, 0,
189 0, 0, 0, 0, 0, 0, 0, 0,
190 0, 0, 0, 0, 0, 0, 0, 0,
191 0, 0, 0, 0, 0, 0, 0, 0,
194 // 3* dc_qlookup[Q]*dc_qlookup[Q];
196 /* values are now correlated to quantizer */
197 static int sad_per_bit16lut[QINDEX_RANGE];
198 static int sad_per_bit4lut[QINDEX_RANGE];
200 void vp9_init_me_luts() {
203 // Initialize the sad lut tables using a formulaic calculation for now
204 // This is to make it easier to resolve the impact of experimental changes
205 // to the quantizer tables.
206 for (i = 0; i < QINDEX_RANGE; i++) {
207 const double q = vp9_convert_qindex_to_q(i);
208 sad_per_bit16lut[i] = (int)(0.0418 * q + 2.4107);
209 sad_per_bit4lut[i] = (int)(0.063 * q + 2.742);
213 int vp9_compute_rd_mult(VP9_COMP *cpi, int qindex) {
214 const int q = vp9_dc_quant(qindex, 0);
215 // TODO(debargha): Adjust the function below
216 int rdmult = 88 * q * q / 25;
217 if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
218 if (cpi->twopass.next_iiratio > 31)
219 rdmult += (rdmult * rd_iifactor[31]) >> 4;
221 rdmult += (rdmult * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
226 static int compute_rd_thresh_factor(int qindex) {
228 // TODO(debargha): Adjust the function below
229 q = (int)(pow(vp9_dc_quant(qindex, 0) / 4.0, RD_THRESH_POW) * 5.12);
235 void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
236 cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
237 cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
240 static void set_block_thresholds(VP9_COMP *cpi) {
241 int i, bsize, segment_id;
242 VP9_COMMON *cm = &cpi->common;
243 SPEED_FEATURES *sf = &cpi->sf;
245 for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
246 const int qindex = clamp(vp9_get_qindex(&cm->seg, segment_id,
247 cm->base_qindex) + cm->y_dc_delta_q,
249 const int q = compute_rd_thresh_factor(qindex);
251 for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
252 // Threshold here seems unnecessarily harsh but fine given actual
253 // range of values used for cpi->sf.thresh_mult[].
254 const int t = q * rd_thresh_block_size_factor[bsize];
255 const int thresh_max = INT_MAX / t;
257 for (i = 0; i < MAX_MODES; ++i)
258 cpi->rd_threshes[segment_id][bsize][i] =
259 sf->thresh_mult[i] < thresh_max ? sf->thresh_mult[i] * t / 4
262 for (i = 0; i < MAX_REFS; ++i) {
263 cpi->rd_thresh_sub8x8[segment_id][bsize][i] =
264 sf->thresh_mult_sub8x8[i] < thresh_max
265 ? sf->thresh_mult_sub8x8[i] * t / 4
272 void vp9_initialize_rd_consts(VP9_COMP *cpi) {
273 VP9_COMMON *cm = &cpi->common;
274 MACROBLOCK *x = &cpi->mb;
277 vp9_clear_system_state(); // __asm emms;
279 // Further tests required to see if optimum is different
280 // for key frames, golden frames and arf frames.
281 // if (cpi->common.refresh_golden_frame ||
282 // cpi->common.refresh_alt_ref_frame)
283 qindex = clamp(cm->base_qindex + cm->y_dc_delta_q, 0, MAXQ);
285 cpi->RDDIV = RDDIV_BITS; // in bits (to multiply D by 128)
286 cpi->RDMULT = vp9_compute_rd_mult(cpi, qindex);
288 x->errorperbit = cpi->RDMULT / RD_MULT_EPB_RATIO;
289 x->errorperbit += (x->errorperbit == 0);
291 vp9_set_speed_features(cpi);
293 x->select_txfm_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
294 cm->frame_type != KEY_FRAME) ? 0 : 1;
296 set_block_thresholds(cpi);
298 fill_token_costs(x->token_costs, cm->fc.coef_probs);
300 if (!cpi->sf.use_pick_mode) {
301 for (i = 0; i < PARTITION_CONTEXTS; i++)
302 vp9_cost_tokens(x->partition_cost[i], get_partition_probs(cm, i),
305 fill_mode_costs(cpi);
307 if (!frame_is_intra_only(cm)) {
308 vp9_build_nmv_cost_table(x->nmvjointcost,
309 cm->allow_high_precision_mv ? x->nmvcost_hp
312 cm->allow_high_precision_mv, 1, 1);
314 for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
315 vp9_cost_tokens((int *)x->inter_mode_cost[i],
316 cm->fc.inter_mode_probs[i], vp9_inter_mode_tree);
321 static const int MAX_XSQ_Q10 = 245727;
323 static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
324 // NOTE: The tables below must be of the same size
326 // The functions described below are sampled at the four most significant
327 // bits of x^2 + 8 / 256
330 // This table models the rate for a Laplacian source
331 // source with given variance when quantized with a uniform quantizer
332 // with given stepsize. The closed form expression is:
333 // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
334 // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
335 // and H(x) is the binary entropy function.
336 static const int rate_tab_q10[] = {
337 65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651,
338 4553, 4389, 4255, 4142, 4044, 3958, 3881, 3811,
339 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186,
340 3133, 3037, 2952, 2877, 2809, 2747, 2690, 2638,
341 2589, 2501, 2423, 2353, 2290, 2232, 2179, 2130,
342 2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651,
343 1608, 1530, 1460, 1398, 1342, 1290, 1243, 1199,
344 1159, 1086, 1021, 963, 911, 864, 821, 781,
345 745, 680, 623, 574, 530, 490, 455, 424,
346 395, 345, 304, 269, 239, 213, 190, 171,
347 154, 126, 104, 87, 73, 61, 52, 44,
348 38, 28, 21, 16, 12, 10, 8, 6,
349 5, 3, 2, 1, 1, 1, 0, 0,
351 // Normalized distortion
352 // This table models the normalized distortion for a Laplacian source
353 // source with given variance when quantized with a uniform quantizer
354 // with given stepsize. The closed form expression is:
355 // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
356 // where x = qpstep / sqrt(variance)
357 // Note the actual distortion is Dn * variance.
358 static const int dist_tab_q10[] = {
359 0, 0, 1, 1, 1, 2, 2, 2,
360 3, 3, 4, 5, 5, 6, 7, 7,
361 8, 9, 11, 12, 13, 15, 16, 17,
362 18, 21, 24, 26, 29, 31, 34, 36,
363 39, 44, 49, 54, 59, 64, 69, 73,
364 78, 88, 97, 106, 115, 124, 133, 142,
365 151, 167, 184, 200, 215, 231, 245, 260,
366 274, 301, 327, 351, 375, 397, 418, 439,
367 458, 495, 528, 559, 587, 613, 637, 659,
368 680, 717, 749, 777, 801, 823, 842, 859,
369 874, 899, 919, 936, 949, 960, 969, 977,
370 983, 994, 1001, 1006, 1010, 1013, 1015, 1017,
371 1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024,
373 static const int xsq_iq_q10[] = {
374 0, 4, 8, 12, 16, 20, 24, 28,
375 32, 40, 48, 56, 64, 72, 80, 88,
376 96, 112, 128, 144, 160, 176, 192, 208,
377 224, 256, 288, 320, 352, 384, 416, 448,
378 480, 544, 608, 672, 736, 800, 864, 928,
379 992, 1120, 1248, 1376, 1504, 1632, 1760, 1888,
380 2016, 2272, 2528, 2784, 3040, 3296, 3552, 3808,
381 4064, 4576, 5088, 5600, 6112, 6624, 7136, 7648,
382 8160, 9184, 10208, 11232, 12256, 13280, 14304, 15328,
383 16352, 18400, 20448, 22496, 24544, 26592, 28640, 30688,
384 32736, 36832, 40928, 45024, 49120, 53216, 57312, 61408,
385 65504, 73696, 81888, 90080, 98272, 106464, 114656, 122848,
386 131040, 147424, 163808, 180192, 196576, 212960, 229344, 245728,
389 static const int tab_size = sizeof(rate_tab_q10) / sizeof(rate_tab_q10[0]);
390 assert(sizeof(dist_tab_q10) / sizeof(dist_tab_q10[0]) == tab_size);
391 assert(sizeof(xsq_iq_q10) / sizeof(xsq_iq_q10[0]) == tab_size);
392 assert(MAX_XSQ_Q10 + 1 == xsq_iq_q10[tab_size - 1]);
394 int tmp = (xsq_q10 >> 2) + 8;
395 int k = get_msb(tmp) - 3;
396 int xq = (k << 3) + ((tmp >> k) & 0x7);
397 const int one_q10 = 1 << 10;
398 const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
399 const int b_q10 = one_q10 - a_q10;
400 *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
401 *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
404 static void model_rd_from_var_lapndz(unsigned int var, unsigned int n,
405 unsigned int qstep, int *rate,
407 // This function models the rate and distortion for a Laplacian
408 // source with given variance when quantized with a uniform quantizer
409 // with given stepsize. The closed form expressions are in:
410 // Hang and Chen, "Source Model for transform video coder and its
411 // application - Part I: Fundamental Theory", IEEE Trans. Circ.
412 // Sys. for Video Tech., April 1997.
418 uint64_t xsq_q10_64 =
419 ((((uint64_t)qstep * qstep * n) << 10) + (var >> 1)) / var;
420 int xsq_q10 = xsq_q10_64 > MAX_XSQ_Q10 ? MAX_XSQ_Q10 : xsq_q10_64;
421 model_rd_norm(xsq_q10, &r_q10, &d_q10);
422 *rate = (n * r_q10 + 2) >> 2;
423 *dist = (var * (int64_t)d_q10 + 512) >> 10;
427 static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
428 MACROBLOCK *x, MACROBLOCKD *xd,
429 int *out_rate_sum, int64_t *out_dist_sum) {
430 // Note our transform coeffs are 8 times an orthogonal transform.
431 // Hence quantizer step is also 8 times. To get effective quantizer
432 // we need to divide by 8 before sending to modeling function.
433 int i, rate_sum = 0, dist_sum = 0;
434 int ref = xd->mi_8x8[0]->mbmi.ref_frame[0];
437 for (i = 0; i < MAX_MB_PLANE; ++i) {
438 struct macroblock_plane *const p = &x->plane[i];
439 struct macroblockd_plane *const pd = &xd->plane[i];
440 const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
442 (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
443 pd->dst.buf, pd->dst.stride, &sse);
446 x->pred_sse[ref] = sse;
447 if (cpi->sf.use_pick_mode) {
448 dist_sum += (int)sse;
452 model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],
453 pd->dequant[1] >> 3, &rate, &dist);
455 dist_sum += (int)dist;
459 *out_rate_sum = rate_sum;
460 *out_dist_sum = (int64_t)dist_sum << 4;
463 static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE bsize,
465 MACROBLOCK *x, MACROBLOCKD *xd,
466 int *out_rate_sum, int64_t *out_dist_sum,
470 struct macroblock_plane *const p = &x->plane[0];
471 struct macroblockd_plane *const pd = &xd->plane[0];
472 const int width = 4 * num_4x4_blocks_wide_lookup[bsize];
473 const int height = 4 * num_4x4_blocks_high_lookup[bsize];
475 int64_t dist_sum = 0;
476 const int t = 4 << tx_size;
478 if (tx_size == TX_4X4) {
480 } else if (tx_size == TX_8X8) {
482 } else if (tx_size == TX_16X16) {
484 } else if (tx_size == TX_32X32) {
491 for (j = 0; j < height; j += t) {
492 for (k = 0; k < width; k += t) {
496 cpi->fn_ptr[bs].vf(&p->src.buf[j * p->src.stride + k], p->src.stride,
497 &pd->dst.buf[j * pd->dst.stride + k], pd->dst.stride,
499 // sse works better than var, since there is no dc prediction used
500 model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3, &rate, &dist);
503 *out_skip &= (rate < 1024);
507 *out_rate_sum = rate_sum;
508 *out_dist_sum = dist_sum << 4;
511 int64_t vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff,
512 intptr_t block_size, int64_t *ssz) {
514 int64_t error = 0, sqcoeff = 0;
516 for (i = 0; i < block_size; i++) {
517 int this_diff = coeff[i] - dqcoeff[i];
518 error += (unsigned)this_diff * this_diff;
519 sqcoeff += (unsigned) coeff[i] * coeff[i];
526 /* The trailing '0' is a terminator which is used inside cost_coeffs() to
527 * decide whether to include cost of a trailing EOB node or not (i.e. we
528 * can skip this if the last coefficient in this transform block, e.g. the
529 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
531 static const int16_t band_counts[TX_SIZES][8] = {
532 { 1, 2, 3, 4, 3, 16 - 13, 0 },
533 { 1, 2, 3, 4, 11, 64 - 21, 0 },
534 { 1, 2, 3, 4, 11, 256 - 21, 0 },
535 { 1, 2, 3, 4, 11, 1024 - 21, 0 },
538 static INLINE int cost_coeffs(MACROBLOCK *x,
539 int plane, int block,
540 ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
542 const int16_t *scan, const int16_t *nb) {
543 MACROBLOCKD *const xd = &x->e_mbd;
544 MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
545 struct macroblock_plane *p = &x->plane[plane];
546 struct macroblockd_plane *pd = &xd->plane[plane];
547 const PLANE_TYPE type = pd->plane_type;
548 const int16_t *band_count = &band_counts[tx_size][1];
549 const int eob = p->eobs[block];
550 const int16_t *const qcoeff_ptr = BLOCK_OFFSET(p->qcoeff, block);
551 const int ref = mbmi->ref_frame[0] != INTRA_FRAME;
552 unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
553 x->token_costs[tx_size][type][ref];
554 const ENTROPY_CONTEXT above_ec = !!*A, left_ec = !!*L;
555 uint8_t *p_tok = x->token_cache;
556 int pt = combine_entropy_contexts(above_ec, left_ec);
559 // Check for consistency of tx_size with mode info
560 assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size
561 : get_uv_tx_size(mbmi) == tx_size);
565 cost = token_costs[0][0][pt][EOB_TOKEN];
568 int band_left = *band_count++;
571 int v = qcoeff_ptr[0];
572 int prev_t = vp9_dct_value_tokens_ptr[v].token;
573 cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
574 p_tok[0] = vp9_pt_energy_class[prev_t];
578 for (c = 1; c < eob; c++) {
579 const int rc = scan[c];
583 t = vp9_dct_value_tokens_ptr[v].token;
584 pt = get_coef_context(nb, p_tok, c);
585 cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v];
586 p_tok[rc] = vp9_pt_energy_class[t];
589 band_left = *band_count++;
596 pt = get_coef_context(nb, p_tok, c);
597 cost += (*token_costs)[0][pt][EOB_TOKEN];
601 // is eob first coefficient;
607 static void dist_block(int plane, int block, TX_SIZE tx_size,
608 struct rdcost_block_args* args) {
609 const int ss_txfrm_size = tx_size << 1;
610 MACROBLOCK* const x = args->x;
611 MACROBLOCKD* const xd = &x->e_mbd;
612 struct macroblock_plane *const p = &x->plane[plane];
613 struct macroblockd_plane *const pd = &xd->plane[plane];
615 int shift = tx_size == TX_32X32 ? 0 : 2;
616 int16_t *const coeff = BLOCK_OFFSET(p->coeff, block);
617 int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
618 args->dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
620 args->sse = this_sse >> shift;
622 if (x->skip_encode && !is_inter_block(&xd->mi_8x8[0]->mbmi)) {
623 // TODO(jingning): tune the model to better capture the distortion.
624 int64_t p = (pd->dequant[1] * pd->dequant[1] *
625 (1 << ss_txfrm_size)) >> (shift + 2);
626 args->dist += (p >> 4);
631 static void rate_block(int plane, int block, BLOCK_SIZE plane_bsize,
632 TX_SIZE tx_size, struct rdcost_block_args* args) {
634 txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x_idx, &y_idx);
636 args->rate = cost_coeffs(args->x, plane, block, args->t_above + x_idx,
637 args->t_left + y_idx, tx_size,
638 args->so->scan, args->so->neighbors);
641 static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
642 TX_SIZE tx_size, void *arg) {
643 struct rdcost_block_args *args = arg;
644 MACROBLOCK *const x = args->x;
645 MACROBLOCKD *const xd = &x->e_mbd;
646 MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
647 int64_t rd1, rd2, rd;
652 if (!is_inter_block(mbmi))
653 vp9_encode_block_intra(x, plane, block, plane_bsize, tx_size, &mbmi->skip);
655 vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
657 dist_block(plane, block, tx_size, args);
658 rate_block(plane, block, plane_bsize, tx_size, args);
659 rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist);
660 rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse);
662 // TODO(jingning): temporarily enabled only for luma component
665 x->zcoeff_blk[tx_size][block] = !x->plane[plane].eobs[block] ||
666 (rd1 > rd2 && !xd->lossless);
668 args->this_rate += args->rate;
669 args->this_dist += args->dist;
670 args->this_sse += args->sse;
673 if (args->this_rd > args->best_rd) {
679 void vp9_get_entropy_contexts(TX_SIZE tx_size,
680 ENTROPY_CONTEXT t_above[16], ENTROPY_CONTEXT t_left[16],
681 const ENTROPY_CONTEXT *above, const ENTROPY_CONTEXT *left,
682 int num_4x4_w, int num_4x4_h) {
686 vpx_memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
687 vpx_memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
690 for (i = 0; i < num_4x4_w; i += 2)
691 t_above[i] = !!*(const uint16_t *)&above[i];
692 for (i = 0; i < num_4x4_h; i += 2)
693 t_left[i] = !!*(const uint16_t *)&left[i];
696 for (i = 0; i < num_4x4_w; i += 4)
697 t_above[i] = !!*(const uint32_t *)&above[i];
698 for (i = 0; i < num_4x4_h; i += 4)
699 t_left[i] = !!*(const uint32_t *)&left[i];
702 for (i = 0; i < num_4x4_w; i += 8)
703 t_above[i] = !!*(const uint64_t *)&above[i];
704 for (i = 0; i < num_4x4_h; i += 8)
705 t_left[i] = !!*(const uint64_t *)&left[i];
708 assert(0 && "Invalid transform size.");
712 static void txfm_rd_in_plane(MACROBLOCK *x,
713 int *rate, int64_t *distortion,
714 int *skippable, int64_t *sse,
715 int64_t ref_best_rd, int plane,
716 BLOCK_SIZE bsize, TX_SIZE tx_size) {
717 MACROBLOCKD *const xd = &x->e_mbd;
718 struct macroblockd_plane *const pd = &xd->plane[plane];
719 const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
720 const int num_4x4_w = num_4x4_blocks_wide_lookup[bs];
721 const int num_4x4_h = num_4x4_blocks_high_lookup[bs];
722 struct rdcost_block_args args = { 0 };
724 args.best_rd = ref_best_rd;
727 xd->mi_8x8[0]->mbmi.tx_size = tx_size;
729 vp9_get_entropy_contexts(tx_size, args.t_above, args.t_left,
730 pd->above_context, pd->left_context,
731 num_4x4_w, num_4x4_h);
733 args.so = get_scan(xd, tx_size, pd->plane_type, 0);
735 vp9_foreach_transformed_block_in_plane(xd, bsize, plane,
736 block_rd_txfm, &args);
739 *distortion = INT64_MAX;
743 *distortion = args.this_dist;
744 *rate = args.this_rate;
745 *sse = args.this_sse;
746 *skippable = vp9_is_skippable_in_plane(x, bsize, plane);
750 static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x,
751 int *rate, int64_t *distortion,
752 int *skip, int64_t *sse,
755 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
756 VP9_COMMON *const cm = &cpi->common;
757 const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
758 MACROBLOCKD *const xd = &x->e_mbd;
759 MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
761 mbmi->tx_size = MIN(max_tx_size, largest_tx_size);
763 txfm_rd_in_plane(x, rate, distortion, skip,
764 &sse[mbmi->tx_size], ref_best_rd, 0, bs,
766 cpi->tx_stepdown_count[0]++;
769 static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
770 int (*r)[2], int *rate,
771 int64_t *d, int64_t *distortion,
773 int64_t tx_cache[TX_MODES],
775 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
776 VP9_COMMON *const cm = &cpi->common;
777 MACROBLOCKD *const xd = &x->e_mbd;
778 MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
779 vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
780 int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX},
781 {INT64_MAX, INT64_MAX},
782 {INT64_MAX, INT64_MAX},
783 {INT64_MAX, INT64_MAX}};
786 const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
787 int64_t best_rd = INT64_MAX;
788 TX_SIZE best_tx = TX_4X4;
790 const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs);
791 assert(skip_prob > 0);
792 s0 = vp9_cost_bit(skip_prob, 0);
793 s1 = vp9_cost_bit(skip_prob, 1);
795 for (n = TX_4X4; n <= max_tx_size; n++) {
797 if (r[n][0] < INT_MAX) {
798 for (m = 0; m <= n - (n == max_tx_size); m++) {
800 r[n][1] += vp9_cost_zero(tx_probs[m]);
802 r[n][1] += vp9_cost_one(tx_probs[m]);
805 if (d[n] == INT64_MAX) {
806 rd[n][0] = rd[n][1] = INT64_MAX;
808 rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
810 rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
811 rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
814 if (rd[n][1] < best_rd) {
819 mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ?
820 best_tx : MIN(max_tx_size, max_mode_tx_size);
823 *distortion = d[mbmi->tx_size];
824 *rate = r[mbmi->tx_size][cm->tx_mode == TX_MODE_SELECT];
825 *skip = s[mbmi->tx_size];
827 tx_cache[ONLY_4X4] = rd[TX_4X4][0];
828 tx_cache[ALLOW_8X8] = rd[TX_8X8][0];
829 tx_cache[ALLOW_16X16] = rd[MIN(max_tx_size, TX_16X16)][0];
830 tx_cache[ALLOW_32X32] = rd[MIN(max_tx_size, TX_32X32)][0];
832 if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
833 tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
834 cpi->tx_stepdown_count[0]++;
835 } else if (max_tx_size >= TX_16X16 && best_tx == TX_16X16) {
836 tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
837 cpi->tx_stepdown_count[max_tx_size - TX_16X16]++;
838 } else if (rd[TX_8X8][1] < rd[TX_4X4][1]) {
839 tx_cache[TX_MODE_SELECT] = rd[TX_8X8][1];
840 cpi->tx_stepdown_count[max_tx_size - TX_8X8]++;
842 tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1];
843 cpi->tx_stepdown_count[max_tx_size - TX_4X4]++;
847 static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x,
848 int (*r)[2], int *rate,
849 int64_t *d, int64_t *distortion,
850 int *s, int *skip, int64_t *sse,
853 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
854 VP9_COMMON *const cm = &cpi->common;
855 MACROBLOCKD *const xd = &x->e_mbd;
856 MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
857 vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
858 int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX},
859 {INT64_MAX, INT64_MAX},
860 {INT64_MAX, INT64_MAX},
861 {INT64_MAX, INT64_MAX}};
864 double scale_rd[TX_SIZES] = {1.73, 1.44, 1.20, 1.00};
865 const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
866 int64_t best_rd = INT64_MAX;
867 TX_SIZE best_tx = TX_4X4;
869 const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs);
870 assert(skip_prob > 0);
871 s0 = vp9_cost_bit(skip_prob, 0);
872 s1 = vp9_cost_bit(skip_prob, 1);
874 for (n = TX_4X4; n <= max_tx_size; n++) {
875 double scale = scale_rd[n];
877 for (m = 0; m <= n - (n == max_tx_size); m++) {
879 r[n][1] += vp9_cost_zero(tx_probs[m]);
881 r[n][1] += vp9_cost_one(tx_probs[m]);
884 rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]) * scale;
886 rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]) * scale;
887 rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]) * scale;
889 if (rd[n][1] < best_rd) {
895 mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ?
896 best_tx : MIN(max_tx_size, max_mode_tx_size);
898 // Actually encode using the chosen mode if a model was used, but do not
899 // update the r, d costs
900 txfm_rd_in_plane(x, rate, distortion, skip,
901 &sse[mbmi->tx_size], ref_best_rd, 0, bs, mbmi->tx_size);
903 if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
904 cpi->tx_stepdown_count[0]++;
905 } else if (max_tx_size >= TX_16X16 && best_tx == TX_16X16) {
906 cpi->tx_stepdown_count[max_tx_size - TX_16X16]++;
907 } else if (rd[TX_8X8][1] <= rd[TX_4X4][1]) {
908 cpi->tx_stepdown_count[max_tx_size - TX_8X8]++;
910 cpi->tx_stepdown_count[max_tx_size - TX_4X4]++;
914 static void super_block_yrd(VP9_COMP *cpi,
915 MACROBLOCK *x, int *rate, int64_t *distortion,
916 int *skip, int64_t *psse, BLOCK_SIZE bs,
917 int64_t txfm_cache[TX_MODES],
918 int64_t ref_best_rd) {
919 int r[TX_SIZES][2], s[TX_SIZES];
920 int64_t d[TX_SIZES], sse[TX_SIZES];
921 MACROBLOCKD *xd = &x->e_mbd;
922 MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
923 const int b_inter_mode = is_inter_block(mbmi);
924 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
928 assert(bs == mbmi->sb_type);
930 vp9_subtract_sby(x, bs);
932 if (cpi->sf.tx_size_search_method == USE_LARGESTALL ||
933 (cpi->sf.tx_size_search_method != USE_FULL_RD &&
935 vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t));
936 choose_largest_txfm_size(cpi, x, rate, distortion, skip, sse,
939 *psse = sse[mbmi->tx_size];
943 if (cpi->sf.tx_size_search_method == USE_LARGESTINTRA_MODELINTER &&
945 for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size)
946 model_rd_for_sb_y_tx(cpi, bs, tx_size, x, xd,
947 &r[tx_size][0], &d[tx_size], &s[tx_size]);
948 choose_txfm_size_from_modelrd(cpi, x, r, rate, d, distortion, s,
949 skip, sse, ref_best_rd, bs);
951 for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size)
952 txfm_rd_in_plane(x, &r[tx_size][0], &d[tx_size],
953 &s[tx_size], &sse[tx_size],
954 ref_best_rd, 0, bs, tx_size);
955 choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s,
956 skip, txfm_cache, bs);
959 *psse = sse[mbmi->tx_size];
962 static int conditional_skipintra(MB_PREDICTION_MODE mode,
963 MB_PREDICTION_MODE best_intra_mode) {
964 if (mode == D117_PRED &&
965 best_intra_mode != V_PRED &&
966 best_intra_mode != D135_PRED)
968 if (mode == D63_PRED &&
969 best_intra_mode != V_PRED &&
970 best_intra_mode != D45_PRED)
972 if (mode == D207_PRED &&
973 best_intra_mode != H_PRED &&
974 best_intra_mode != D45_PRED)
976 if (mode == D153_PRED &&
977 best_intra_mode != H_PRED &&
978 best_intra_mode != D135_PRED)
983 static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
984 MB_PREDICTION_MODE *best_mode,
986 ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
987 int *bestrate, int *bestratey,
988 int64_t *bestdistortion,
989 BLOCK_SIZE bsize, int64_t rd_thresh) {
990 MB_PREDICTION_MODE mode;
991 MACROBLOCKD *xd = &x->e_mbd;
992 int64_t best_rd = rd_thresh;
995 struct macroblock_plane *p = &x->plane[0];
996 struct macroblockd_plane *pd = &xd->plane[0];
997 const int src_stride = p->src.stride;
998 const int dst_stride = pd->dst.stride;
999 const uint8_t *src_init = &p->src.buf[raster_block_offset(BLOCK_8X8, ib,
1001 uint8_t *dst_init = &pd->dst.buf[raster_block_offset(BLOCK_8X8, ib,
1003 int16_t *src_diff, *coeff;
1005 ENTROPY_CONTEXT ta[2], tempa[2];
1006 ENTROPY_CONTEXT tl[2], templ[2];
1008 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1009 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1011 uint8_t best_dst[8 * 8];
1015 vpx_memcpy(ta, a, sizeof(ta));
1016 vpx_memcpy(tl, l, sizeof(tl));
1017 xd->mi_8x8[0]->mbmi.tx_size = TX_4X4;
1019 for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
1023 if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode)))
1026 // Only do the oblique modes if the best so far is
1027 // one of the neighboring directional modes
1028 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
1029 if (conditional_skipintra(mode, *best_mode))
1033 rate = bmode_costs[mode];
1036 vpx_memcpy(tempa, ta, sizeof(ta));
1037 vpx_memcpy(templ, tl, sizeof(tl));
1039 for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
1040 for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
1042 const scan_order *so;
1043 const uint8_t *src = src_init + idx * 4 + idy * 4 * src_stride;
1044 uint8_t *dst = dst_init + idx * 4 + idy * 4 * dst_stride;
1045 const int block = ib + idy * 2 + idx;
1047 xd->mi_8x8[0]->bmi[block].as_mode = mode;
1048 src_diff = raster_block_offset_int16(BLOCK_8X8, block, p->src_diff);
1049 coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
1050 vp9_predict_intra_block(xd, block, 1,
1052 x->skip_encode ? src : dst,
1053 x->skip_encode ? src_stride : dst_stride,
1054 dst, dst_stride, idx, idy, 0);
1055 vp9_subtract_block(4, 4, src_diff, 8,
1059 tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block);
1060 so = &vp9_scan_orders[TX_4X4][tx_type];
1062 if (tx_type != DCT_DCT)
1063 vp9_fht4x4(src_diff, coeff, 8, tx_type);
1065 x->fwd_txm4x4(src_diff, coeff, 8);
1067 vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
1069 ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
1070 so->scan, so->neighbors);
1071 distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block),
1073 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
1076 if (tx_type != DCT_DCT)
1077 vp9_iht4x4_16_add(BLOCK_OFFSET(pd->dqcoeff, block),
1078 dst, pd->dst.stride, tx_type);
1080 xd->itxm_add(BLOCK_OFFSET(pd->dqcoeff, block), dst, pd->dst.stride,
1086 this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
1088 if (this_rd < best_rd) {
1091 *bestdistortion = distortion;
1094 vpx_memcpy(a, tempa, sizeof(tempa));
1095 vpx_memcpy(l, templ, sizeof(templ));
1096 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
1097 vpx_memcpy(best_dst + idy * 8, dst_init + idy * dst_stride,
1098 num_4x4_blocks_wide * 4);
1104 if (best_rd >= rd_thresh || x->skip_encode)
1107 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
1108 vpx_memcpy(dst_init + idy * dst_stride, best_dst + idy * 8,
1109 num_4x4_blocks_wide * 4);
1114 static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP * const cpi,
1115 MACROBLOCK * const mb,
1118 int64_t * const distortion,
1121 MACROBLOCKD *const xd = &mb->e_mbd;
1122 MODE_INFO *const mic = xd->mi_8x8[0];
1123 const MODE_INFO *above_mi = xd->mi_8x8[-xd->mode_info_stride];
1124 const MODE_INFO *left_mi = xd->left_available ? xd->mi_8x8[-1] : NULL;
1125 const BLOCK_SIZE bsize = xd->mi_8x8[0]->mbmi.sb_type;
1126 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1127 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1130 int64_t total_distortion = 0;
1132 int64_t total_rd = 0;
1133 ENTROPY_CONTEXT t_above[4], t_left[4];
1136 vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
1137 vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
1139 bmode_costs = mb->mbmode_cost;
1141 // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block.
1142 for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
1143 for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
1144 MB_PREDICTION_MODE best_mode = DC_PRED;
1145 int r = INT_MAX, ry = INT_MAX;
1146 int64_t d = INT64_MAX, this_rd = INT64_MAX;
1148 if (cpi->common.frame_type == KEY_FRAME) {
1149 const MB_PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, i);
1150 const MB_PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, i);
1152 bmode_costs = mb->y_mode_costs[A][L];
1155 this_rd = rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs,
1156 t_above + idx, t_left + idy, &r, &ry, &d,
1157 bsize, best_rd - total_rd);
1158 if (this_rd >= best_rd - total_rd)
1161 total_rd += this_rd;
1163 total_distortion += d;
1166 mic->bmi[i].as_mode = best_mode;
1167 for (j = 1; j < num_4x4_blocks_high; ++j)
1168 mic->bmi[i + j * 2].as_mode = best_mode;
1169 for (j = 1; j < num_4x4_blocks_wide; ++j)
1170 mic->bmi[i + j].as_mode = best_mode;
1172 if (total_rd >= best_rd)
1178 *rate_y = tot_rate_y;
1179 *distortion = total_distortion;
1180 mic->mbmi.mode = mic->bmi[3].as_mode;
1182 return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion);
1185 static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
1186 int *rate, int *rate_tokenonly,
1187 int64_t *distortion, int *skippable,
1189 int64_t tx_cache[TX_MODES],
1191 MB_PREDICTION_MODE mode;
1192 MB_PREDICTION_MODE mode_selected = DC_PRED;
1193 MACROBLOCKD *const xd = &x->e_mbd;
1194 MODE_INFO *const mic = xd->mi_8x8[0];
1195 int this_rate, this_rate_tokenonly, s;
1196 int64_t this_distortion, this_rd;
1197 TX_SIZE best_tx = TX_4X4;
1199 int *bmode_costs = x->mbmode_cost;
1201 if (cpi->sf.tx_size_search_method == USE_FULL_RD)
1202 for (i = 0; i < TX_MODES; i++)
1203 tx_cache[i] = INT64_MAX;
1205 /* Y Search for intra prediction mode */
1206 for (mode = DC_PRED; mode <= TM_PRED; mode++) {
1207 int64_t local_tx_cache[TX_MODES];
1208 MODE_INFO *above_mi = xd->mi_8x8[-xd->mode_info_stride];
1209 MODE_INFO *left_mi = xd->left_available ? xd->mi_8x8[-1] : NULL;
1211 if (!(cpi->sf.intra_y_mode_mask[max_txsize_lookup[bsize]] & (1 << mode)))
1214 if (cpi->common.frame_type == KEY_FRAME) {
1215 const MB_PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, 0);
1216 const MB_PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, 0);
1218 bmode_costs = x->y_mode_costs[A][L];
1220 mic->mbmi.mode = mode;
1222 super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, NULL,
1223 bsize, local_tx_cache, best_rd);
1225 if (this_rate_tokenonly == INT_MAX)
1228 this_rate = this_rate_tokenonly + bmode_costs[mode];
1229 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
1231 if (this_rd < best_rd) {
1232 mode_selected = mode;
1234 best_tx = mic->mbmi.tx_size;
1236 *rate_tokenonly = this_rate_tokenonly;
1237 *distortion = this_distortion;
1241 if (cpi->sf.tx_size_search_method == USE_FULL_RD && this_rd < INT64_MAX) {
1242 for (i = 0; i < TX_MODES && local_tx_cache[i] < INT64_MAX; i++) {
1243 const int64_t adj_rd = this_rd + local_tx_cache[i] -
1244 local_tx_cache[cpi->common.tx_mode];
1245 if (adj_rd < tx_cache[i]) {
1246 tx_cache[i] = adj_rd;
1252 mic->mbmi.mode = mode_selected;
1253 mic->mbmi.tx_size = best_tx;
1258 static void super_block_uvrd(VP9_COMP *const cpi, MACROBLOCK *x,
1259 int *rate, int64_t *distortion, int *skippable,
1260 int64_t *sse, BLOCK_SIZE bsize,
1261 int64_t ref_best_rd) {
1262 MACROBLOCKD *const xd = &x->e_mbd;
1263 MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
1264 TX_SIZE uv_txfm_size = get_uv_tx_size(mbmi);
1266 int pnrate = 0, pnskip = 1;
1267 int64_t pndist = 0, pnsse = 0;
1269 if (ref_best_rd < 0)
1272 if (is_inter_block(mbmi))
1273 vp9_subtract_sbuv(x, bsize);
1280 for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
1281 txfm_rd_in_plane(x, &pnrate, &pndist, &pnskip, &pnsse,
1282 ref_best_rd, plane, bsize, uv_txfm_size);
1283 if (pnrate == INT_MAX)
1286 *distortion += pndist;
1288 *skippable &= pnskip;
1294 *distortion = INT64_MAX;
1300 static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
1301 PICK_MODE_CONTEXT *ctx,
1302 int *rate, int *rate_tokenonly,
1303 int64_t *distortion, int *skippable,
1304 BLOCK_SIZE bsize, TX_SIZE max_tx_size) {
1305 MB_PREDICTION_MODE mode;
1306 MB_PREDICTION_MODE mode_selected = DC_PRED;
1307 int64_t best_rd = INT64_MAX, this_rd;
1308 int this_rate_tokenonly, this_rate, s;
1309 int64_t this_distortion, this_sse;
1311 for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
1312 if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode)))
1315 x->e_mbd.mi_8x8[0]->mbmi.uv_mode = mode;
1317 super_block_uvrd(cpi, x, &this_rate_tokenonly,
1318 &this_distortion, &s, &this_sse, bsize, best_rd);
1319 if (this_rate_tokenonly == INT_MAX)
1321 this_rate = this_rate_tokenonly +
1322 x->intra_uv_mode_cost[cpi->common.frame_type][mode];
1323 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
1325 if (this_rd < best_rd) {
1326 mode_selected = mode;
1329 *rate_tokenonly = this_rate_tokenonly;
1330 *distortion = this_distortion;
1332 if (!x->select_txfm_size) {
1334 struct macroblock_plane *const p = x->plane;
1335 struct macroblockd_plane *const pd = x->e_mbd.plane;
1336 for (i = 1; i < MAX_MB_PLANE; ++i) {
1337 p[i].coeff = ctx->coeff_pbuf[i][2];
1338 p[i].qcoeff = ctx->qcoeff_pbuf[i][2];
1339 pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][2];
1340 p[i].eobs = ctx->eobs_pbuf[i][2];
1342 ctx->coeff_pbuf[i][2] = ctx->coeff_pbuf[i][0];
1343 ctx->qcoeff_pbuf[i][2] = ctx->qcoeff_pbuf[i][0];
1344 ctx->dqcoeff_pbuf[i][2] = ctx->dqcoeff_pbuf[i][0];
1345 ctx->eobs_pbuf[i][2] = ctx->eobs_pbuf[i][0];
1347 ctx->coeff_pbuf[i][0] = p[i].coeff;
1348 ctx->qcoeff_pbuf[i][0] = p[i].qcoeff;
1349 ctx->dqcoeff_pbuf[i][0] = pd[i].dqcoeff;
1350 ctx->eobs_pbuf[i][0] = p[i].eobs;
1356 x->e_mbd.mi_8x8[0]->mbmi.uv_mode = mode_selected;
1360 static int64_t rd_sbuv_dcpred(VP9_COMP *cpi, MACROBLOCK *x,
1361 int *rate, int *rate_tokenonly,
1362 int64_t *distortion, int *skippable,
1367 x->e_mbd.mi_8x8[0]->mbmi.uv_mode = DC_PRED;
1368 super_block_uvrd(cpi, x, rate_tokenonly, distortion,
1369 skippable, &this_sse, bsize, INT64_MAX);
1370 *rate = *rate_tokenonly +
1371 x->intra_uv_mode_cost[cpi->common.frame_type][DC_PRED];
1372 this_rd = RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
1377 static void choose_intra_uv_mode(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
1378 BLOCK_SIZE bsize, TX_SIZE max_tx_size,
1379 int *rate_uv, int *rate_uv_tokenonly,
1380 int64_t *dist_uv, int *skip_uv,
1381 MB_PREDICTION_MODE *mode_uv) {
1382 MACROBLOCK *const x = &cpi->mb;
1384 // Use an estimated rd for uv_intra based on DC_PRED if the
1385 // appropriate speed flag is set.
1386 if (cpi->sf.use_uv_intra_rd_estimate) {
1387 rd_sbuv_dcpred(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
1388 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
1389 // Else do a proper rd search for each possible transform size that may
1390 // be considered in the main rd loop.
1392 rd_pick_intra_sbuv_mode(cpi, x, ctx,
1393 rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
1394 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize, max_tx_size);
1396 *mode_uv = x->e_mbd.mi_8x8[0]->mbmi.uv_mode;
1399 static int cost_mv_ref(VP9_COMP *cpi, MB_PREDICTION_MODE mode,
1401 MACROBLOCK *const x = &cpi->mb;
1402 MACROBLOCKD *const xd = &x->e_mbd;
1403 const int segment_id = xd->mi_8x8[0]->mbmi.segment_id;
1405 // Don't account for mode here if segment skip is enabled.
1406 if (!vp9_segfeature_active(&cpi->common.seg, segment_id, SEG_LVL_SKIP)) {
1407 assert(is_inter_mode(mode));
1408 return x->inter_mode_cost[mode_context][INTER_OFFSET(mode)];
1414 void vp9_set_mbmode_and_mvs(MACROBLOCKD *xd, MB_PREDICTION_MODE mode,
1416 xd->mi_8x8[0]->mbmi.mode = mode;
1417 xd->mi_8x8[0]->mbmi.mv[0].as_mv = *mv;
1420 static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
1423 int mi_row, int mi_col,
1424 int_mv single_newmv[MAX_REF_FRAMES],
1427 static int labels2mode(MACROBLOCK *x, int i,
1428 MB_PREDICTION_MODE this_mode,
1429 int_mv *this_mv, int_mv *this_second_mv,
1430 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
1431 int_mv seg_mvs[MAX_REF_FRAMES],
1432 int_mv *best_ref_mv,
1433 int_mv *second_best_ref_mv,
1434 int *mvjcost, int *mvcost[2], VP9_COMP *cpi) {
1435 MACROBLOCKD *const xd = &x->e_mbd;
1436 MODE_INFO *const mic = xd->mi_8x8[0];
1437 MB_MODE_INFO *mbmi = &mic->mbmi;
1438 int cost = 0, thismvcost = 0;
1440 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
1441 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
1442 const int has_second_rf = has_second_ref(mbmi);
1444 /* We have to be careful retrieving previously-encoded motion vectors.
1445 Ones from this macroblock have to be pulled from the BLOCKD array
1446 as they have not yet made it to the bmi array in our MB_MODE_INFO. */
1447 MB_PREDICTION_MODE m;
1449 // the only time we should do costing for new motion vector or mode
1450 // is when we are on a new label (jbb May 08, 2007)
1451 switch (m = this_mode) {
1453 this_mv->as_int = seg_mvs[mbmi->ref_frame[0]].as_int;
1454 thismvcost = vp9_mv_bit_cost(&this_mv->as_mv, &best_ref_mv->as_mv,
1455 mvjcost, mvcost, MV_COST_WEIGHT_SUB);
1456 if (has_second_rf) {
1457 this_second_mv->as_int = seg_mvs[mbmi->ref_frame[1]].as_int;
1458 thismvcost += vp9_mv_bit_cost(&this_second_mv->as_mv,
1459 &second_best_ref_mv->as_mv,
1460 mvjcost, mvcost, MV_COST_WEIGHT_SUB);
1464 this_mv->as_int = frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int;
1466 this_second_mv->as_int =
1467 frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int;
1470 this_mv->as_int = frame_mv[NEARMV][mbmi->ref_frame[0]].as_int;
1472 this_second_mv->as_int =
1473 frame_mv[NEARMV][mbmi->ref_frame[1]].as_int;
1476 this_mv->as_int = 0;
1478 this_second_mv->as_int = 0;
1484 cost = cost_mv_ref(cpi, this_mode,
1485 mbmi->mode_context[mbmi->ref_frame[0]]);
1487 mic->bmi[i].as_mv[0].as_int = this_mv->as_int;
1489 mic->bmi[i].as_mv[1].as_int = this_second_mv->as_int;
1491 mic->bmi[i].as_mode = m;
1493 for (idy = 0; idy < num_4x4_blocks_high; ++idy)
1494 for (idx = 0; idx < num_4x4_blocks_wide; ++idx)
1495 vpx_memcpy(&mic->bmi[i + idy * 2 + idx],
1496 &mic->bmi[i], sizeof(mic->bmi[i]));
1502 static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
1507 int64_t *distortion, int64_t *sse,
1508 ENTROPY_CONTEXT *ta,
1509 ENTROPY_CONTEXT *tl,
1510 int mi_row, int mi_col) {
1512 MACROBLOCKD *xd = &x->e_mbd;
1513 struct macroblockd_plane *const pd = &xd->plane[0];
1514 struct macroblock_plane *const p = &x->plane[0];
1515 MODE_INFO *const mi = xd->mi_8x8[0];
1516 const BLOCK_SIZE plane_bsize = get_plane_block_size(mi->mbmi.sb_type, pd);
1517 const int width = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
1518 const int height = 4 * num_4x4_blocks_high_lookup[plane_bsize];
1521 const uint8_t *const src = &p->src.buf[raster_block_offset(BLOCK_8X8, i,
1523 uint8_t *const dst = &pd->dst.buf[raster_block_offset(BLOCK_8X8, i,
1525 int64_t thisdistortion = 0, thissse = 0;
1526 int thisrate = 0, ref;
1527 const scan_order *so = &vp9_default_scan_orders[TX_4X4];
1528 const int is_compound = has_second_ref(&mi->mbmi);
1529 for (ref = 0; ref < 1 + is_compound; ++ref) {
1530 const uint8_t *pre = &pd->pre[ref].buf[raster_block_offset(BLOCK_8X8, i,
1531 pd->pre[ref].stride)];
1532 vp9_build_inter_predictor(pre, pd->pre[ref].stride,
1533 dst, pd->dst.stride,
1534 &mi->bmi[i].as_mv[ref].as_mv,
1535 &xd->block_refs[ref]->sf, width, height, ref,
1536 xd->interp_kernel, MV_PRECISION_Q3,
1537 mi_col * MI_SIZE + 4 * (i % 2),
1538 mi_row * MI_SIZE + 4 * (i / 2));
1541 vp9_subtract_block(height, width,
1542 raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 8,
1544 dst, pd->dst.stride);
1547 for (idy = 0; idy < height / 4; ++idy) {
1548 for (idx = 0; idx < width / 4; ++idx) {
1549 int64_t ssz, rd, rd1, rd2;
1552 k += (idy * 2 + idx);
1553 coeff = BLOCK_OFFSET(p->coeff, k);
1554 x->fwd_txm4x4(raster_block_offset_int16(BLOCK_8X8, k, p->src_diff),
1556 vp9_regular_quantize_b_4x4(x, 0, k, so->scan, so->iscan);
1557 thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k),
1560 thisrate += cost_coeffs(x, 0, k, ta + (k & 1), tl + (k >> 1), TX_4X4,
1561 so->scan, so->neighbors);
1562 rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion >> 2);
1563 rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse >> 2);
1570 *distortion = thisdistortion >> 2;
1571 *labelyrate = thisrate;
1572 *sse = thissse >> 2;
1574 return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion);
1585 ENTROPY_CONTEXT ta[2];
1586 ENTROPY_CONTEXT tl[2];
1590 int_mv *ref_mv, *second_ref_mv;
1598 MB_PREDICTION_MODE modes[4];
1599 SEG_RDSTAT rdstat[4][INTER_MODES];
1603 static INLINE int mv_check_bounds(const MACROBLOCK *x, const MV *mv) {
1604 return (mv->row >> 3) < x->mv_row_min ||
1605 (mv->row >> 3) > x->mv_row_max ||
1606 (mv->col >> 3) < x->mv_col_min ||
1607 (mv->col >> 3) > x->mv_col_max;
1610 static INLINE void mi_buf_shift(MACROBLOCK *x, int i) {
1611 MB_MODE_INFO *const mbmi = &x->e_mbd.mi_8x8[0]->mbmi;
1612 struct macroblock_plane *const p = &x->plane[0];
1613 struct macroblockd_plane *const pd = &x->e_mbd.plane[0];
1615 p->src.buf = &p->src.buf[raster_block_offset(BLOCK_8X8, i, p->src.stride)];
1616 assert(((intptr_t)pd->pre[0].buf & 0x7) == 0);
1617 pd->pre[0].buf = &pd->pre[0].buf[raster_block_offset(BLOCK_8X8, i,
1618 pd->pre[0].stride)];
1619 if (has_second_ref(mbmi))
1620 pd->pre[1].buf = &pd->pre[1].buf[raster_block_offset(BLOCK_8X8, i,
1621 pd->pre[1].stride)];
1624 static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src,
1625 struct buf_2d orig_pre[2]) {
1626 MB_MODE_INFO *mbmi = &x->e_mbd.mi_8x8[0]->mbmi;
1627 x->plane[0].src = orig_src;
1628 x->e_mbd.plane[0].pre[0] = orig_pre[0];
1629 if (has_second_ref(mbmi))
1630 x->e_mbd.plane[0].pre[1] = orig_pre[1];
1633 static INLINE int mv_has_subpel(const MV *mv) {
1634 return (mv->row & 0x0F) || (mv->col & 0x0F);
1637 static void rd_check_segment_txsize(VP9_COMP *cpi, MACROBLOCK *x,
1638 const TileInfo *const tile,
1639 BEST_SEG_INFO *bsi_buf, int filter_idx,
1640 int_mv seg_mvs[4][MAX_REF_FRAMES],
1641 int mi_row, int mi_col) {
1642 int k, br = 0, idx, idy;
1643 int64_t bd = 0, block_sse = 0;
1644 MB_PREDICTION_MODE this_mode;
1645 MACROBLOCKD *xd = &x->e_mbd;
1646 VP9_COMMON *cm = &cpi->common;
1647 MODE_INFO *mi = xd->mi_8x8[0];
1648 MB_MODE_INFO *const mbmi = &mi->mbmi;
1649 struct macroblock_plane *const p = &x->plane[0];
1650 struct macroblockd_plane *const pd = &xd->plane[0];
1651 const int label_count = 4;
1652 int64_t this_segment_rd = 0;
1653 int label_mv_thresh;
1654 int segmentyrate = 0;
1655 const BLOCK_SIZE bsize = mbmi->sb_type;
1656 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1657 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1658 vp9_variance_fn_ptr_t *v_fn_ptr = &cpi->fn_ptr[bsize];
1659 ENTROPY_CONTEXT t_above[2], t_left[2];
1660 BEST_SEG_INFO *bsi = bsi_buf + filter_idx;
1662 int subpelmv = 1, have_ref = 0;
1663 const int has_second_rf = has_second_ref(mbmi);
1665 vpx_memcpy(t_above, pd->above_context, sizeof(t_above));
1666 vpx_memcpy(t_left, pd->left_context, sizeof(t_left));
1668 // 64 makes this threshold really big effectively
1669 // making it so that we very rarely check mvs on
1670 // segments. setting this to 1 would make mv thresh
1671 // roughly equal to what it is for macroblocks
1672 label_mv_thresh = 1 * bsi->mvthresh / label_count;
1674 // Segmentation method overheads
1675 for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
1676 for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
1677 // TODO(jingning,rbultje): rewrite the rate-distortion optimization
1678 // loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop
1679 int_mv mode_mv[MB_MODE_COUNT], second_mode_mv[MB_MODE_COUNT];
1680 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
1681 MB_PREDICTION_MODE mode_selected = ZEROMV;
1682 int64_t best_rd = INT64_MAX;
1683 const int i = idy * 2 + idx;
1686 for (ref = 0; ref < 1 + has_second_rf; ++ref) {
1687 const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
1688 frame_mv[ZEROMV][frame].as_int = 0;
1689 vp9_append_sub8x8_mvs_for_idx(cm, xd, tile, i, ref, mi_row, mi_col,
1690 &frame_mv[NEARESTMV][frame],
1691 &frame_mv[NEARMV][frame]);
1694 // search for the best motion vector on this segment
1695 for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
1696 const struct buf_2d orig_src = x->plane[0].src;
1697 struct buf_2d orig_pre[2];
1699 mode_idx = INTER_OFFSET(this_mode);
1700 bsi->rdstat[i][mode_idx].brdcost = INT64_MAX;
1702 // if we're near/nearest and mv == 0,0, compare to zeromv
1703 if ((this_mode == NEARMV || this_mode == NEARESTMV ||
1704 this_mode == ZEROMV) &&
1705 frame_mv[this_mode][mbmi->ref_frame[0]].as_int == 0 &&
1707 frame_mv[this_mode][mbmi->ref_frame[1]].as_int == 0)) {
1708 int rfc = mbmi->mode_context[mbmi->ref_frame[0]];
1709 int c1 = cost_mv_ref(cpi, NEARMV, rfc);
1710 int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
1711 int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
1713 if (this_mode == NEARMV) {
1716 } else if (this_mode == NEARESTMV) {
1720 assert(this_mode == ZEROMV);
1721 if (!has_second_rf) {
1723 frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0) ||
1725 frame_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0))
1729 frame_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0 &&
1730 frame_mv[NEARESTMV][mbmi->ref_frame[1]].as_int == 0) ||
1732 frame_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0 &&
1733 frame_mv[NEARMV][mbmi->ref_frame[1]].as_int == 0))
1739 vpx_memcpy(orig_pre, pd->pre, sizeof(orig_pre));
1740 vpx_memcpy(bsi->rdstat[i][mode_idx].ta, t_above,
1741 sizeof(bsi->rdstat[i][mode_idx].ta));
1742 vpx_memcpy(bsi->rdstat[i][mode_idx].tl, t_left,
1743 sizeof(bsi->rdstat[i][mode_idx].tl));
1745 // motion search for newmv (single predictor case only)
1746 if (!has_second_rf && this_mode == NEWMV &&
1747 seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) {
1748 int_mv *const new_mv = &mode_mv[NEWMV];
1751 int thissme, bestsme = INT_MAX;
1752 int sadpb = x->sadperbit4;
1756 /* Is the best so far sufficiently good that we cant justify doing
1757 * and new motion search. */
1758 if (best_rd < label_mv_thresh)
1761 if (cpi->oxcf.mode != MODE_SECONDPASS_BEST &&
1762 cpi->oxcf.mode != MODE_BESTQUALITY) {
1763 // use previous block's result as next block's MV predictor.
1765 bsi->mvp.as_int = mi->bmi[i - 1].as_mv[0].as_int;
1767 bsi->mvp.as_int = mi->bmi[i - 2].as_mv[0].as_int;
1771 max_mv = x->max_mv_context[mbmi->ref_frame[0]];
1773 max_mv = MAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3;
1775 if (cpi->sf.auto_mv_step_size && cm->show_frame) {
1776 // Take wtd average of the step_params based on the last frame's
1777 // max mv magnitude and the best ref mvs of the current block for
1778 // the given reference.
1779 step_param = (vp9_init_search_range(cpi, max_mv) +
1780 cpi->mv_step_param) >> 1;
1782 step_param = cpi->mv_step_param;
1785 mvp_full.row = bsi->mvp.as_mv.row >> 3;
1786 mvp_full.col = bsi->mvp.as_mv.col >> 3;
1788 if (cpi->sf.adaptive_motion_search && cm->show_frame) {
1789 mvp_full.row = x->pred_mv[mbmi->ref_frame[0]].as_mv.row >> 3;
1790 mvp_full.col = x->pred_mv[mbmi->ref_frame[0]].as_mv.col >> 3;
1791 step_param = MAX(step_param, 8);
1794 further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
1795 // adjust src pointer for this block
1798 vp9_set_mv_search_range(x, &bsi->ref_mv->as_mv);
1800 if (cpi->sf.search_method == HEX) {
1801 bestsme = vp9_hex_search(x, &mvp_full,
1803 sadpb, 1, v_fn_ptr, 1,
1804 &bsi->ref_mv->as_mv,
1806 } else if (cpi->sf.search_method == SQUARE) {
1807 bestsme = vp9_square_search(x, &mvp_full,
1809 sadpb, 1, v_fn_ptr, 1,
1810 &bsi->ref_mv->as_mv,
1812 } else if (cpi->sf.search_method == BIGDIA) {
1813 bestsme = vp9_bigdia_search(x, &mvp_full,
1815 sadpb, 1, v_fn_ptr, 1,
1816 &bsi->ref_mv->as_mv,
1819 bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
1820 sadpb, further_steps, 0, v_fn_ptr,
1821 &bsi->ref_mv->as_mv,
1825 // Should we do a full search (best quality only)
1826 if (cpi->oxcf.mode == MODE_BESTQUALITY ||
1827 cpi->oxcf.mode == MODE_SECONDPASS_BEST) {
1828 /* Check if mvp_full is within the range. */
1829 clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max,
1830 x->mv_row_min, x->mv_row_max);
1832 thissme = cpi->full_search_sad(x, &mvp_full,
1833 sadpb, 16, v_fn_ptr,
1834 x->nmvjointcost, x->mvcost,
1835 &bsi->ref_mv->as_mv, i);
1837 if (thissme < bestsme) {
1839 new_mv->as_int = mi->bmi[i].as_mv[0].as_int;
1841 /* The full search result is actually worse so re-instate the
1842 * previous best vector */
1843 mi->bmi[i].as_mv[0].as_int = new_mv->as_int;
1847 if (bestsme < INT_MAX) {
1849 cpi->find_fractional_mv_step(x,
1851 &bsi->ref_mv->as_mv,
1852 cm->allow_high_precision_mv,
1853 x->errorperbit, v_fn_ptr,
1854 cpi->sf.subpel_force_stop,
1855 cpi->sf.subpel_iters_per_step,
1856 x->nmvjointcost, x->mvcost,
1858 &x->pred_sse[mbmi->ref_frame[0]]);
1860 // save motion search result for use in compound prediction
1861 seg_mvs[i][mbmi->ref_frame[0]].as_int = new_mv->as_int;
1864 if (cpi->sf.adaptive_motion_search)
1865 x->pred_mv[mbmi->ref_frame[0]].as_int = new_mv->as_int;
1867 // restore src pointers
1868 mi_buf_restore(x, orig_src, orig_pre);
1871 if (has_second_rf) {
1872 if (seg_mvs[i][mbmi->ref_frame[1]].as_int == INVALID_MV ||
1873 seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV)
1877 if (has_second_rf && this_mode == NEWMV &&
1878 mbmi->interp_filter == EIGHTTAP) {
1879 // adjust src pointers
1881 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
1883 joint_motion_search(cpi, x, bsize, frame_mv[this_mode],
1884 mi_row, mi_col, seg_mvs[i],
1886 seg_mvs[i][mbmi->ref_frame[0]].as_int =
1887 frame_mv[this_mode][mbmi->ref_frame[0]].as_int;
1888 seg_mvs[i][mbmi->ref_frame[1]].as_int =
1889 frame_mv[this_mode][mbmi->ref_frame[1]].as_int;
1891 // restore src pointers
1892 mi_buf_restore(x, orig_src, orig_pre);
1895 bsi->rdstat[i][mode_idx].brate =
1896 labels2mode(x, i, this_mode, &mode_mv[this_mode],
1897 &second_mode_mv[this_mode], frame_mv, seg_mvs[i],
1898 bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost,
1902 bsi->rdstat[i][mode_idx].mvs[0].as_int = mode_mv[this_mode].as_int;
1903 if (num_4x4_blocks_wide > 1)
1904 bsi->rdstat[i + 1][mode_idx].mvs[0].as_int =
1905 mode_mv[this_mode].as_int;
1906 if (num_4x4_blocks_high > 1)
1907 bsi->rdstat[i + 2][mode_idx].mvs[0].as_int =
1908 mode_mv[this_mode].as_int;
1909 if (has_second_rf) {
1910 bsi->rdstat[i][mode_idx].mvs[1].as_int =
1911 second_mode_mv[this_mode].as_int;
1912 if (num_4x4_blocks_wide > 1)
1913 bsi->rdstat[i + 1][mode_idx].mvs[1].as_int =
1914 second_mode_mv[this_mode].as_int;
1915 if (num_4x4_blocks_high > 1)
1916 bsi->rdstat[i + 2][mode_idx].mvs[1].as_int =
1917 second_mode_mv[this_mode].as_int;
1920 // Trap vectors that reach beyond the UMV borders
1921 if (mv_check_bounds(x, &mode_mv[this_mode].as_mv) ||
1923 mv_check_bounds(x, &second_mode_mv[this_mode].as_mv)))
1926 if (filter_idx > 0) {
1927 BEST_SEG_INFO *ref_bsi = bsi_buf;
1928 subpelmv = mv_has_subpel(&mode_mv[this_mode].as_mv);
1929 have_ref = mode_mv[this_mode].as_int ==
1930 ref_bsi->rdstat[i][mode_idx].mvs[0].as_int;
1931 if (has_second_rf) {
1932 subpelmv |= mv_has_subpel(&second_mode_mv[this_mode].as_mv);
1933 have_ref &= second_mode_mv[this_mode].as_int ==
1934 ref_bsi->rdstat[i][mode_idx].mvs[1].as_int;
1937 if (filter_idx > 1 && !subpelmv && !have_ref) {
1938 ref_bsi = bsi_buf + 1;
1939 have_ref = mode_mv[this_mode].as_int ==
1940 ref_bsi->rdstat[i][mode_idx].mvs[0].as_int;
1941 if (has_second_rf) {
1942 have_ref &= second_mode_mv[this_mode].as_int ==
1943 ref_bsi->rdstat[i][mode_idx].mvs[1].as_int;
1947 if (!subpelmv && have_ref &&
1948 ref_bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
1949 vpx_memcpy(&bsi->rdstat[i][mode_idx], &ref_bsi->rdstat[i][mode_idx],
1950 sizeof(SEG_RDSTAT));
1951 if (num_4x4_blocks_wide > 1)
1952 bsi->rdstat[i + 1][mode_idx].eobs =
1953 ref_bsi->rdstat[i + 1][mode_idx].eobs;
1954 if (num_4x4_blocks_high > 1)
1955 bsi->rdstat[i + 2][mode_idx].eobs =
1956 ref_bsi->rdstat[i + 2][mode_idx].eobs;
1958 if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
1959 mode_selected = this_mode;
1960 best_rd = bsi->rdstat[i][mode_idx].brdcost;
1966 bsi->rdstat[i][mode_idx].brdcost =
1967 encode_inter_mb_segment(cpi, x,
1968 bsi->segment_rd - this_segment_rd, i,
1969 &bsi->rdstat[i][mode_idx].byrate,
1970 &bsi->rdstat[i][mode_idx].bdist,
1971 &bsi->rdstat[i][mode_idx].bsse,
1972 bsi->rdstat[i][mode_idx].ta,
1973 bsi->rdstat[i][mode_idx].tl,
1975 if (bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
1976 bsi->rdstat[i][mode_idx].brdcost += RDCOST(x->rdmult, x->rddiv,
1977 bsi->rdstat[i][mode_idx].brate, 0);
1978 bsi->rdstat[i][mode_idx].brate += bsi->rdstat[i][mode_idx].byrate;
1979 bsi->rdstat[i][mode_idx].eobs = p->eobs[i];
1980 if (num_4x4_blocks_wide > 1)
1981 bsi->rdstat[i + 1][mode_idx].eobs = p->eobs[i + 1];
1982 if (num_4x4_blocks_high > 1)
1983 bsi->rdstat[i + 2][mode_idx].eobs = p->eobs[i + 2];
1986 if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
1987 mode_selected = this_mode;
1988 best_rd = bsi->rdstat[i][mode_idx].brdcost;
1990 } /*for each 4x4 mode*/
1992 if (best_rd == INT64_MAX) {
1994 for (iy = i + 1; iy < 4; ++iy)
1995 for (midx = 0; midx < INTER_MODES; ++midx)
1996 bsi->rdstat[iy][midx].brdcost = INT64_MAX;
1997 bsi->segment_rd = INT64_MAX;
2001 mode_idx = INTER_OFFSET(mode_selected);
2002 vpx_memcpy(t_above, bsi->rdstat[i][mode_idx].ta, sizeof(t_above));
2003 vpx_memcpy(t_left, bsi->rdstat[i][mode_idx].tl, sizeof(t_left));
2005 labels2mode(x, i, mode_selected, &mode_mv[mode_selected],
2006 &second_mode_mv[mode_selected], frame_mv, seg_mvs[i],
2007 bsi->ref_mv, bsi->second_ref_mv, x->nmvjointcost,
2010 br += bsi->rdstat[i][mode_idx].brate;
2011 bd += bsi->rdstat[i][mode_idx].bdist;
2012 block_sse += bsi->rdstat[i][mode_idx].bsse;
2013 segmentyrate += bsi->rdstat[i][mode_idx].byrate;
2014 this_segment_rd += bsi->rdstat[i][mode_idx].brdcost;
2016 if (this_segment_rd > bsi->segment_rd) {
2018 for (iy = i + 1; iy < 4; ++iy)
2019 for (midx = 0; midx < INTER_MODES; ++midx)
2020 bsi->rdstat[iy][midx].brdcost = INT64_MAX;
2021 bsi->segment_rd = INT64_MAX;
2025 } /* for each label */
2029 bsi->segment_yrate = segmentyrate;
2030 bsi->segment_rd = this_segment_rd;
2031 bsi->sse = block_sse;
2033 // update the coding decisions
2034 for (k = 0; k < 4; ++k)
2035 bsi->modes[k] = mi->bmi[k].as_mode;
2038 static int64_t rd_pick_best_mbsegmentation(VP9_COMP *cpi, MACROBLOCK *x,
2039 const TileInfo *const tile,
2040 int_mv *best_ref_mv,
2041 int_mv *second_best_ref_mv,
2045 int64_t *returndistortion,
2046 int *skippable, int64_t *psse,
2048 int_mv seg_mvs[4][MAX_REF_FRAMES],
2049 BEST_SEG_INFO *bsi_buf,
2051 int mi_row, int mi_col) {
2053 BEST_SEG_INFO *bsi = bsi_buf + filter_idx;
2054 MACROBLOCKD *xd = &x->e_mbd;
2055 MODE_INFO *mi = xd->mi_8x8[0];
2056 MB_MODE_INFO *mbmi = &mi->mbmi;
2061 bsi->segment_rd = best_rd;
2062 bsi->ref_mv = best_ref_mv;
2063 bsi->second_ref_mv = second_best_ref_mv;
2064 bsi->mvp.as_int = best_ref_mv->as_int;
2065 bsi->mvthresh = mvthresh;
2067 for (i = 0; i < 4; i++)
2068 bsi->modes[i] = ZEROMV;
2070 rd_check_segment_txsize(cpi, x, tile, bsi_buf, filter_idx, seg_mvs,
2073 if (bsi->segment_rd > best_rd)
2075 /* set it to the best */
2076 for (i = 0; i < 4; i++) {
2077 mode_idx = INTER_OFFSET(bsi->modes[i]);
2078 mi->bmi[i].as_mv[0].as_int = bsi->rdstat[i][mode_idx].mvs[0].as_int;
2079 if (has_second_ref(mbmi))
2080 mi->bmi[i].as_mv[1].as_int = bsi->rdstat[i][mode_idx].mvs[1].as_int;
2081 x->plane[0].eobs[i] = bsi->rdstat[i][mode_idx].eobs;
2082 mi->bmi[i].as_mode = bsi->modes[i];
2086 * used to set mbmi->mv.as_int
2088 *returntotrate = bsi->r;
2089 *returndistortion = bsi->d;
2090 *returnyrate = bsi->segment_yrate;
2091 *skippable = vp9_is_skippable_in_plane(x, BLOCK_8X8, 0);
2093 mbmi->mode = bsi->modes[3];
2095 return bsi->segment_rd;
2098 static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
2099 uint8_t *ref_y_buffer, int ref_y_stride,
2100 int ref_frame, BLOCK_SIZE block_size ) {
2101 MACROBLOCKD *xd = &x->e_mbd;
2102 MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
2107 int best_sad = INT_MAX;
2108 int this_sad = INT_MAX;
2111 uint8_t *src_y_ptr = x->plane[0].src.buf;
2113 int row_offset, col_offset;
2114 int num_mv_refs = MAX_MV_REF_CANDIDATES +
2115 (cpi->sf.adaptive_motion_search &&
2116 cpi->common.show_frame &&
2117 block_size < cpi->sf.max_partition_size);
2120 pred_mv[0] = mbmi->ref_mvs[ref_frame][0];
2121 pred_mv[1] = mbmi->ref_mvs[ref_frame][1];
2122 pred_mv[2] = x->pred_mv[ref_frame];
2124 // Get the sad for each candidate reference mv
2125 for (i = 0; i < num_mv_refs; i++) {
2126 this_mv.as_int = pred_mv[i].as_int;
2128 max_mv = MAX(max_mv,
2129 MAX(abs(this_mv.as_mv.row), abs(this_mv.as_mv.col)) >> 3);
2130 // only need to check zero mv once
2131 if (!this_mv.as_int && zero_seen) {
2132 x->mode_sad[ref_frame][i] = x->mode_sad[ref_frame][INTER_OFFSET(ZEROMV)];
2135 zero_seen = zero_seen || !this_mv.as_int;
2137 row_offset = this_mv.as_mv.row >> 3;
2138 col_offset = this_mv.as_mv.col >> 3;
2139 ref_y_ptr = ref_y_buffer + (ref_y_stride * row_offset) + col_offset;
2141 // Find sad for current vector.
2142 this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
2143 ref_y_ptr, ref_y_stride,
2145 x->mode_sad[ref_frame][i] = this_sad;
2146 if (this_mv.as_int == 0)
2147 x->mode_sad[ref_frame][INTER_OFFSET(ZEROMV)] = this_sad;
2149 // Note if it is the best so far.
2150 if (this_sad < best_sad) {
2151 best_sad = this_sad;
2157 x->mode_sad[ref_frame][INTER_OFFSET(ZEROMV)] =
2158 cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
2159 ref_y_buffer, ref_y_stride,
2162 // Note the index of the mv that worked best in the reference list.
2163 x->mv_best_ref_index[ref_frame] = best_index;
2164 x->max_mv_context[ref_frame] = max_mv;
2165 x->pred_mv_sad[ref_frame] = best_sad;
2168 static void estimate_ref_frame_costs(VP9_COMP *cpi, int segment_id,
2169 unsigned int *ref_costs_single,
2170 unsigned int *ref_costs_comp,
2171 vp9_prob *comp_mode_p) {
2172 VP9_COMMON *const cm = &cpi->common;
2173 MACROBLOCKD *const xd = &cpi->mb.e_mbd;
2174 int seg_ref_active = vp9_segfeature_active(&cm->seg, segment_id,
2176 if (seg_ref_active) {
2177 vpx_memset(ref_costs_single, 0, MAX_REF_FRAMES * sizeof(*ref_costs_single));
2178 vpx_memset(ref_costs_comp, 0, MAX_REF_FRAMES * sizeof(*ref_costs_comp));
2181 vp9_prob intra_inter_p = vp9_get_intra_inter_prob(cm, xd);
2182 vp9_prob comp_inter_p = 128;
2184 if (cm->reference_mode == REFERENCE_MODE_SELECT) {
2185 comp_inter_p = vp9_get_reference_mode_prob(cm, xd);
2186 *comp_mode_p = comp_inter_p;
2191 ref_costs_single[INTRA_FRAME] = vp9_cost_bit(intra_inter_p, 0);
2193 if (cm->reference_mode != COMPOUND_REFERENCE) {
2194 vp9_prob ref_single_p1 = vp9_get_pred_prob_single_ref_p1(cm, xd);
2195 vp9_prob ref_single_p2 = vp9_get_pred_prob_single_ref_p2(cm, xd);
2196 unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1);
2198 if (cm->reference_mode == REFERENCE_MODE_SELECT)
2199 base_cost += vp9_cost_bit(comp_inter_p, 0);
2201 ref_costs_single[LAST_FRAME] = ref_costs_single[GOLDEN_FRAME] =
2202 ref_costs_single[ALTREF_FRAME] = base_cost;
2203 ref_costs_single[LAST_FRAME] += vp9_cost_bit(ref_single_p1, 0);
2204 ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p1, 1);
2205 ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p1, 1);
2206 ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p2, 0);
2207 ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p2, 1);
2209 ref_costs_single[LAST_FRAME] = 512;
2210 ref_costs_single[GOLDEN_FRAME] = 512;
2211 ref_costs_single[ALTREF_FRAME] = 512;
2213 if (cm->reference_mode != SINGLE_REFERENCE) {
2214 vp9_prob ref_comp_p = vp9_get_pred_prob_comp_ref_p(cm, xd);
2215 unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1);
2217 if (cm->reference_mode == REFERENCE_MODE_SELECT)
2218 base_cost += vp9_cost_bit(comp_inter_p, 1);
2220 ref_costs_comp[LAST_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 0);
2221 ref_costs_comp[GOLDEN_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 1);
2223 ref_costs_comp[LAST_FRAME] = 512;
2224 ref_costs_comp[GOLDEN_FRAME] = 512;
2229 static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
2232 int_mv *second_ref_mv,
2233 int64_t comp_pred_diff[REFERENCE_MODES],
2234 int64_t tx_size_diff[TX_MODES],
2235 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]) {
2236 MACROBLOCKD *const xd = &x->e_mbd;
2238 // Take a snapshot of the coding context so it can be
2239 // restored if we decide to encode this way
2240 ctx->skip = x->skip;
2241 ctx->best_mode_index = mode_index;
2242 ctx->mic = *xd->mi_8x8[0];
2244 ctx->best_ref_mv[0].as_int = ref_mv->as_int;
2245 ctx->best_ref_mv[1].as_int = second_ref_mv->as_int;
2247 ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE];
2248 ctx->comp_pred_diff = (int)comp_pred_diff[COMPOUND_REFERENCE];
2249 ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT];
2251 vpx_memcpy(ctx->tx_rd_diff, tx_size_diff, sizeof(ctx->tx_rd_diff));
2252 vpx_memcpy(ctx->best_filter_diff, best_filter_diff,
2253 sizeof(*best_filter_diff) * SWITCHABLE_FILTER_CONTEXTS);
2256 static void setup_pred_block(const MACROBLOCKD *xd,
2257 struct buf_2d dst[MAX_MB_PLANE],
2258 const YV12_BUFFER_CONFIG *src,
2259 int mi_row, int mi_col,
2260 const struct scale_factors *scale,
2261 const struct scale_factors *scale_uv) {
2264 dst[0].buf = src->y_buffer;
2265 dst[0].stride = src->y_stride;
2266 dst[1].buf = src->u_buffer;
2267 dst[2].buf = src->v_buffer;
2268 dst[1].stride = dst[2].stride = src->uv_stride;
2270 dst[3].buf = src->alpha_buffer;
2271 dst[3].stride = src->alpha_stride;
2274 // TODO(jkoleszar): Make scale factors per-plane data
2275 for (i = 0; i < MAX_MB_PLANE; i++) {
2276 setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col,
2277 i ? scale_uv : scale,
2278 xd->plane[i].subsampling_x, xd->plane[i].subsampling_y);
2282 void vp9_setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
2283 const TileInfo *const tile,
2284 MV_REFERENCE_FRAME ref_frame,
2285 BLOCK_SIZE block_size,
2286 int mi_row, int mi_col,
2287 int_mv frame_nearest_mv[MAX_REF_FRAMES],
2288 int_mv frame_near_mv[MAX_REF_FRAMES],
2289 struct buf_2d yv12_mb[4][MAX_MB_PLANE]) {
2290 const VP9_COMMON *cm = &cpi->common;
2291 const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
2292 MACROBLOCKD *const xd = &x->e_mbd;
2293 MODE_INFO *const mi = xd->mi_8x8[0];
2294 int_mv *const candidates = mi->mbmi.ref_mvs[ref_frame];
2295 const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf;
2297 // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this
2298 // use the UV scaling factors.
2299 setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf);
2301 // Gets an initial list of candidate vectors from neighbours and orders them
2302 vp9_find_mv_refs(cm, xd, tile, mi, xd->last_mi, ref_frame, candidates,
2305 // Candidate refinement carried out at encoder and decoder
2306 vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates,
2307 &frame_nearest_mv[ref_frame],
2308 &frame_near_mv[ref_frame]);
2310 // Further refinement that is encode side only to test the top few candidates
2311 // in full and choose the best as the centre point for subsequent searches.
2312 // The current implementation doesn't support scaling.
2313 if (!vp9_is_scaled(sf) && block_size >= BLOCK_8X8)
2314 mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride,
2315 ref_frame, block_size);
2318 const YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi,
2320 const VP9_COMMON *const cm = &cpi->common;
2321 const int ref_idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)];
2322 const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1];
2323 return (scaled_idx != ref_idx) ? &cm->frame_bufs[scaled_idx].buf : NULL;
2326 static INLINE int get_switchable_rate(const MACROBLOCK *x) {
2327 const MACROBLOCKD *const xd = &x->e_mbd;
2328 const MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi;
2329 const int ctx = vp9_get_pred_context_switchable_interp(xd);
2330 return SWITCHABLE_INTERP_RATE_FACTOR *
2331 x->switchable_interp_costs[ctx][mbmi->interp_filter];
2334 static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
2335 const TileInfo *const tile,
2337 int mi_row, int mi_col,
2338 int_mv *tmp_mv, int *rate_mv) {
2339 MACROBLOCKD *xd = &x->e_mbd;
2340 VP9_COMMON *cm = &cpi->common;
2341 MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
2342 struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}};
2343 int bestsme = INT_MAX;
2344 int further_steps, step_param;
2345 int sadpb = x->sadperbit16;
2347 int ref = mbmi->ref_frame[0];
2348 int_mv ref_mv = mbmi->ref_mvs[ref][0];
2350 int tmp_col_min = x->mv_col_min;
2351 int tmp_col_max = x->mv_col_max;
2352 int tmp_row_min = x->mv_row_min;
2353 int tmp_row_max = x->mv_row_max;
2355 const YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi,
2359 pred_mv[0] = mbmi->ref_mvs[ref][0];
2360 pred_mv[1] = mbmi->ref_mvs[ref][1];
2361 pred_mv[2] = x->pred_mv[ref];
2363 if (scaled_ref_frame) {
2365 // Swap out the reference frame for a version that's been scaled to
2366 // match the resolution of the current frame, allowing the existing
2367 // motion search code to be used without additional modifications.
2368 for (i = 0; i < MAX_MB_PLANE; i++)
2369 backup_yv12[i] = xd->plane[i].pre[0];
2371 setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
2374 vp9_set_mv_search_range(x, &ref_mv.as_mv);
2376 // Work out the size of the first step in the mv step search.
2377 // 0 here is maximum length first step. 1 is MAX >> 1 etc.
2378 if (cpi->sf.auto_mv_step_size && cpi->common.show_frame) {
2379 // Take wtd average of the step_params based on the last frame's
2380 // max mv magnitude and that based on the best ref mvs of the current
2381 // block for the given reference.
2382 step_param = (vp9_init_search_range(cpi, x->max_mv_context[ref]) +
2383 cpi->mv_step_param) >> 1;
2385 step_param = cpi->mv_step_param;
2388 if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64 &&
2389 cpi->common.show_frame) {
2390 int boffset = 2 * (b_width_log2(BLOCK_64X64) - MIN(b_height_log2(bsize),
2391 b_width_log2(bsize)));
2392 step_param = MAX(step_param, boffset);
2395 if (cpi->sf.adaptive_motion_search) {
2396 int bwl = b_width_log2_lookup[bsize];
2397 int bhl = b_height_log2_lookup[bsize];
2399 int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4);
2404 for (i = LAST_FRAME; i <= ALTREF_FRAME && cpi->common.show_frame; ++i) {
2405 if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) {
2406 x->pred_mv[ref].as_int = 0;
2407 tmp_mv->as_int = INVALID_MV;
2409 if (scaled_ref_frame) {
2411 for (i = 0; i < MAX_MB_PLANE; i++)
2412 xd->plane[i].pre[0] = backup_yv12[i];
2419 mvp_full = pred_mv[x->mv_best_ref_index[ref]].as_mv;
2424 // Further step/diamond searches as necessary
2425 further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
2427 if (cpi->sf.search_method == HEX) {
2428 bestsme = vp9_hex_search(x, &mvp_full, step_param, sadpb, 1,
2429 &cpi->fn_ptr[bsize], 1,
2430 &ref_mv.as_mv, &tmp_mv->as_mv);
2431 } else if (cpi->sf.search_method == SQUARE) {
2432 bestsme = vp9_square_search(x, &mvp_full, step_param, sadpb, 1,
2433 &cpi->fn_ptr[bsize], 1,
2434 &ref_mv.as_mv, &tmp_mv->as_mv);
2435 } else if (cpi->sf.search_method == BIGDIA) {
2436 bestsme = vp9_bigdia_search(x, &mvp_full, step_param, sadpb, 1,
2437 &cpi->fn_ptr[bsize], 1,
2438 &ref_mv.as_mv, &tmp_mv->as_mv);
2440 bestsme = vp9_full_pixel_diamond(cpi, x, &mvp_full, step_param,
2441 sadpb, further_steps, 1,
2442 &cpi->fn_ptr[bsize],
2443 &ref_mv.as_mv, &tmp_mv->as_mv);
2446 x->mv_col_min = tmp_col_min;
2447 x->mv_col_max = tmp_col_max;
2448 x->mv_row_min = tmp_row_min;
2449 x->mv_row_max = tmp_row_max;
2451 if (bestsme < INT_MAX) {
2452 int dis; /* TODO: use dis in distortion calculation later. */
2453 cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv.as_mv,
2454 cm->allow_high_precision_mv,
2456 &cpi->fn_ptr[bsize],
2457 cpi->sf.subpel_force_stop,
2458 cpi->sf.subpel_iters_per_step,
2459 x->nmvjointcost, x->mvcost,
2460 &dis, &x->pred_sse[ref]);
2462 *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv.as_mv,
2463 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2465 if (cpi->sf.adaptive_motion_search && cpi->common.show_frame)
2466 x->pred_mv[ref].as_int = tmp_mv->as_int;
2468 if (scaled_ref_frame) {
2470 for (i = 0; i < MAX_MB_PLANE; i++)
2471 xd->plane[i].pre[0] = backup_yv12[i];
2475 static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
2478 int mi_row, int mi_col,
2479 int_mv single_newmv[MAX_REF_FRAMES],
2481 const int pw = 4 * num_4x4_blocks_wide_lookup[bsize];
2482 const int ph = 4 * num_4x4_blocks_high_lookup[bsize];
2483 MACROBLOCKD *xd = &x->e_mbd;
2484 MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
2485 const int refs[2] = { mbmi->ref_frame[0],
2486 mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
2489 // Prediction buffer from second frame.
2490 uint8_t *second_pred = vpx_memalign(16, pw * ph * sizeof(uint8_t));
2492 // Do joint motion search in compound mode to get more accurate mv.
2493 struct buf_2d backup_yv12[2][MAX_MB_PLANE];
2494 struct buf_2d scaled_first_yv12 = xd->plane[0].pre[0];
2495 int last_besterr[2] = {INT_MAX, INT_MAX};
2496 const YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = {
2497 vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[0]),
2498 vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[1])
2501 for (ref = 0; ref < 2; ++ref) {
2502 ref_mv[ref] = mbmi->ref_mvs[refs[ref]][0];
2504 if (scaled_ref_frame[ref]) {
2506 // Swap out the reference frame for a version that's been scaled to
2507 // match the resolution of the current frame, allowing the existing
2508 // motion search code to be used without additional modifications.
2509 for (i = 0; i < MAX_MB_PLANE; i++)
2510 backup_yv12[ref][i] = xd->plane[i].pre[ref];
2511 setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col, NULL);
2514 frame_mv[refs[ref]].as_int = single_newmv[refs[ref]].as_int;
2517 // Allow joint search multiple times iteratively for each ref frame
2518 // and break out the search loop if it couldn't find better mv.
2519 for (ite = 0; ite < 4; ite++) {
2520 struct buf_2d ref_yv12[2];
2521 int bestsme = INT_MAX;
2522 int sadpb = x->sadperbit16;
2524 int search_range = 3;
2526 int tmp_col_min = x->mv_col_min;
2527 int tmp_col_max = x->mv_col_max;
2528 int tmp_row_min = x->mv_row_min;
2529 int tmp_row_max = x->mv_row_max;
2532 // Initialized here because of compiler problem in Visual Studio.
2533 ref_yv12[0] = xd->plane[0].pre[0];
2534 ref_yv12[1] = xd->plane[0].pre[1];
2536 // Get pred block from second frame.
2537 vp9_build_inter_predictor(ref_yv12[!id].buf,
2538 ref_yv12[!id].stride,
2540 &frame_mv[refs[!id]].as_mv,
2541 &xd->block_refs[!id]->sf,
2543 xd->interp_kernel, MV_PRECISION_Q3,
2544 mi_col * MI_SIZE, mi_row * MI_SIZE);
2546 // Compound motion search on first ref frame.
2548 xd->plane[0].pre[0] = ref_yv12[id];
2549 vp9_set_mv_search_range(x, &ref_mv[id].as_mv);
2551 // Use mv result from single mode as mvp.
2552 tmp_mv.as_int = frame_mv[refs[id]].as_int;
2554 tmp_mv.as_mv.col >>= 3;
2555 tmp_mv.as_mv.row >>= 3;
2557 // Small-range full-pixel motion search
2558 bestsme = vp9_refining_search_8p_c(x, &tmp_mv.as_mv, sadpb,
2560 &cpi->fn_ptr[bsize],
2561 x->nmvjointcost, x->mvcost,
2562 &ref_mv[id].as_mv, second_pred,
2565 x->mv_col_min = tmp_col_min;
2566 x->mv_col_max = tmp_col_max;
2567 x->mv_row_min = tmp_row_min;
2568 x->mv_row_max = tmp_row_max;
2570 if (bestsme < INT_MAX) {
2571 int dis; /* TODO: use dis in distortion calculation later. */
2573 bestsme = cpi->find_fractional_mv_step_comp(
2576 cpi->common.allow_high_precision_mv,
2578 &cpi->fn_ptr[bsize],
2579 0, cpi->sf.subpel_iters_per_step,
2580 x->nmvjointcost, x->mvcost,
2581 &dis, &sse, second_pred,
2586 xd->plane[0].pre[0] = scaled_first_yv12;
2588 if (bestsme < last_besterr[id]) {
2589 frame_mv[refs[id]].as_int = tmp_mv.as_int;
2590 last_besterr[id] = bestsme;
2598 for (ref = 0; ref < 2; ++ref) {
2599 if (scaled_ref_frame[ref]) {
2600 // restore the predictor
2602 for (i = 0; i < MAX_MB_PLANE; i++)
2603 xd->plane[i].pre[ref] = backup_yv12[ref][i];
2606 *rate_mv += vp9_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
2607 &mbmi->ref_mvs[refs[ref]][0].as_mv,
2608 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2611 vpx_free(second_pred);
2614 static INLINE void restore_dst_buf(MACROBLOCKD *xd,
2615 uint8_t *orig_dst[MAX_MB_PLANE],
2616 int orig_dst_stride[MAX_MB_PLANE]) {
2618 for (i = 0; i < MAX_MB_PLANE; i++) {
2619 xd->plane[i].dst.buf = orig_dst[i];
2620 xd->plane[i].dst.stride = orig_dst_stride[i];
2624 static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
2625 const TileInfo *const tile,
2627 int64_t txfm_cache[],
2628 int *rate2, int64_t *distortion,
2630 int *rate_y, int64_t *distortion_y,
2631 int *rate_uv, int64_t *distortion_uv,
2632 int *mode_excluded, int *disable_skip,
2633 INTERP_FILTER *best_filter,
2634 int_mv (*mode_mv)[MAX_REF_FRAMES],
2635 int mi_row, int mi_col,
2636 int_mv single_newmv[MAX_REF_FRAMES],
2638 const int64_t ref_best_rd) {
2639 VP9_COMMON *cm = &cpi->common;
2640 MACROBLOCKD *xd = &x->e_mbd;
2641 MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
2642 const int is_comp_pred = has_second_ref(mbmi);
2643 const int num_refs = is_comp_pred ? 2 : 1;
2644 const int this_mode = mbmi->mode;
2645 int_mv *frame_mv = mode_mv[this_mode];
2647 int refs[2] = { mbmi->ref_frame[0],
2648 (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
2650 int64_t this_rd = 0;
2651 DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf, MAX_MB_PLANE * 64 * 64);
2652 int pred_exists = 0;
2654 int64_t rd, best_rd = INT64_MAX;
2655 int best_needs_copy = 0;
2656 uint8_t *orig_dst[MAX_MB_PLANE];
2657 int orig_dst_stride[MAX_MB_PLANE];
2661 if (frame_mv[refs[0]].as_int == INVALID_MV ||
2662 frame_mv[refs[1]].as_int == INVALID_MV)
2666 if (this_mode == NEWMV) {
2669 // Initialize mv using single prediction mode result.
2670 frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
2671 frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
2673 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
2674 joint_motion_search(cpi, x, bsize, frame_mv,
2675 mi_row, mi_col, single_newmv, &rate_mv);
2677 rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]].as_mv,
2678 &mbmi->ref_mvs[refs[0]][0].as_mv,
2679 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2680 rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]].as_mv,
2681 &mbmi->ref_mvs[refs[1]][0].as_mv,
2682 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2687 single_motion_search(cpi, x, tile, bsize, mi_row, mi_col,
2690 frame_mv[refs[0]].as_int =
2691 xd->mi_8x8[0]->bmi[0].as_mv[0].as_int = tmp_mv.as_int;
2692 single_newmv[refs[0]].as_int = tmp_mv.as_int;
2696 // if we're near/nearest and mv == 0,0, compare to zeromv
2697 if ((this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) &&
2698 frame_mv[refs[0]].as_int == 0 &&
2699 !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP) &&
2700 (num_refs == 1 || frame_mv[refs[1]].as_int == 0)) {
2701 int rfc = mbmi->mode_context[mbmi->ref_frame[0]];
2702 int c1 = cost_mv_ref(cpi, NEARMV, rfc);
2703 int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
2704 int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
2706 if (this_mode == NEARMV) {
2709 } else if (this_mode == NEARESTMV) {
2713 assert(this_mode == ZEROMV);
2714 if (num_refs == 1) {
2716 mode_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0) ||
2718 mode_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0))
2722 mode_mv[NEARESTMV][mbmi->ref_frame[0]].as_int == 0 &&
2723 mode_mv[NEARESTMV][mbmi->ref_frame[1]].as_int == 0) ||
2725 mode_mv[NEARMV][mbmi->ref_frame[0]].as_int == 0 &&
2726 mode_mv[NEARMV][mbmi->ref_frame[1]].as_int == 0))
2732 for (i = 0; i < num_refs; ++i) {
2733 cur_mv[i] = frame_mv[refs[i]];
2734 // Clip "next_nearest" so that it does not extend to far out of image
2735 if (this_mode != NEWMV)
2736 clamp_mv2(&cur_mv[i].as_mv, xd);
2738 if (mv_check_bounds(x, &cur_mv[i].as_mv))
2740 mbmi->mv[i].as_int = cur_mv[i].as_int;
2743 // do first prediction into the destination buffer. Do the next
2744 // prediction into a temporary buffer. Then keep track of which one
2745 // of these currently holds the best predictor, and use the other
2746 // one for future predictions. In the end, copy from tmp_buf to
2747 // dst if necessary.
2748 for (i = 0; i < MAX_MB_PLANE; i++) {
2749 orig_dst[i] = xd->plane[i].dst.buf;
2750 orig_dst_stride[i] = xd->plane[i].dst.stride;
2753 /* We don't include the cost of the second reference here, because there
2754 * are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other
2755 * words if you present them in that order, the second one is always known
2756 * if the first is known */
2757 *rate2 += cost_mv_ref(cpi, this_mode,
2758 mbmi->mode_context[mbmi->ref_frame[0]]);
2760 if (!(*mode_excluded))
2761 *mode_excluded = is_comp_pred ? cm->reference_mode == SINGLE_REFERENCE
2762 : cm->reference_mode == COMPOUND_REFERENCE;
2765 // Are all MVs integer pel for Y and UV
2766 intpel_mv = !mv_has_subpel(&mbmi->mv[0].as_mv);
2768 intpel_mv &= !mv_has_subpel(&mbmi->mv[1].as_mv);
2770 // Search for best switchable filter by checking the variance of
2771 // pred error irrespective of whether the filter will be used
2772 cpi->mask_filter_rd = 0;
2773 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
2774 cpi->rd_filter_cache[i] = INT64_MAX;
2776 if (cm->interp_filter != BILINEAR) {
2777 *best_filter = EIGHTTAP;
2778 if (x->source_variance <
2779 cpi->sf.disable_filter_search_var_thresh) {
2780 *best_filter = EIGHTTAP;
2783 int tmp_rate_sum = 0;
2784 int64_t tmp_dist_sum = 0;
2786 for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
2789 mbmi->interp_filter = i;
2790 xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter);
2791 rs = get_switchable_rate(x);
2792 rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
2794 if (i > 0 && intpel_mv) {
2795 rd = RDCOST(x->rdmult, x->rddiv, tmp_rate_sum, tmp_dist_sum);
2796 cpi->rd_filter_cache[i] = rd;
2797 cpi->rd_filter_cache[SWITCHABLE_FILTERS] =
2798 MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
2799 if (cm->interp_filter == SWITCHABLE)
2801 cpi->mask_filter_rd = MAX(cpi->mask_filter_rd, rd);
2804 int64_t dist_sum = 0;
2805 if ((cm->interp_filter == SWITCHABLE &&
2806 (!i || best_needs_copy)) ||
2807 (cm->interp_filter != SWITCHABLE &&
2808 (cm->interp_filter == mbmi->interp_filter ||
2809 (i == 0 && intpel_mv)))) {
2810 restore_dst_buf(xd, orig_dst, orig_dst_stride);
2812 for (j = 0; j < MAX_MB_PLANE; j++) {
2813 xd->plane[j].dst.buf = tmp_buf + j * 64 * 64;
2814 xd->plane[j].dst.stride = 64;
2817 vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
2818 model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum);
2820 rd = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum);
2821 cpi->rd_filter_cache[i] = rd;
2822 cpi->rd_filter_cache[SWITCHABLE_FILTERS] =
2823 MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
2824 if (cm->interp_filter == SWITCHABLE)
2826 cpi->mask_filter_rd = MAX(cpi->mask_filter_rd, rd);
2828 if (i == 0 && intpel_mv) {
2829 tmp_rate_sum = rate_sum;
2830 tmp_dist_sum = dist_sum;
2834 if (i == 0 && cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
2835 if (rd / 2 > ref_best_rd) {
2836 restore_dst_buf(xd, orig_dst, orig_dst_stride);
2840 newbest = i == 0 || rd < best_rd;
2844 *best_filter = mbmi->interp_filter;
2845 if (cm->interp_filter == SWITCHABLE && i && !intpel_mv)
2846 best_needs_copy = !best_needs_copy;
2849 if ((cm->interp_filter == SWITCHABLE && newbest) ||
2850 (cm->interp_filter != SWITCHABLE &&
2851 cm->interp_filter == mbmi->interp_filter)) {
2855 restore_dst_buf(xd, orig_dst, orig_dst_stride);
2858 // Set the appropriate filter
2859 mbmi->interp_filter = cm->interp_filter != SWITCHABLE ?
2860 cm->interp_filter : *best_filter;
2861 xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter);
2862 rs = cm->interp_filter == SWITCHABLE ? get_switchable_rate(x) : 0;
2865 if (best_needs_copy) {
2866 // again temporarily set the buffers to local memory to prevent a memcpy
2867 for (i = 0; i < MAX_MB_PLANE; i++) {
2868 xd->plane[i].dst.buf = tmp_buf + i * 64 * 64;
2869 xd->plane[i].dst.stride = 64;
2873 // Handles the special case when a filter that is not in the
2874 // switchable list (ex. bilinear, 6-tap) is indicated at the frame level
2875 vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
2878 if (cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
2881 model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist);
2882 rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist);
2883 // if current pred_error modeled rd is substantially more than the best
2884 // so far, do not bother doing full rd
2885 if (rd / 2 > ref_best_rd) {
2886 restore_dst_buf(xd, orig_dst, orig_dst_stride);
2891 if (cm->interp_filter == SWITCHABLE)
2892 *rate2 += get_switchable_rate(x);
2894 if (!is_comp_pred) {
2895 if (cpi->active_map_enabled && x->active_ptr[0] == 0)
2897 else if (cpi->allow_encode_breakout && x->encode_breakout) {
2898 const BLOCK_SIZE y_size = get_plane_block_size(bsize, &xd->plane[0]);
2899 const BLOCK_SIZE uv_size = get_plane_block_size(bsize, &xd->plane[1]);
2900 unsigned int var, sse;
2901 // Skipping threshold for ac.
2902 unsigned int thresh_ac;
2903 // Set a maximum for threshold to avoid big PSNR loss in low bitrate case.
2904 // Use extreme low threshold for static frames to limit skipping.
2905 const unsigned int max_thresh = (cpi->allow_encode_breakout ==
2906 ENCODE_BREAKOUT_LIMITED) ? 128 : 36000;
2907 // The encode_breakout input
2908 const unsigned int min_thresh = ((x->encode_breakout << 4) > max_thresh) ?
2909 max_thresh : (x->encode_breakout << 4);
2911 // Calculate threshold according to dequant value.
2912 thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) / 9;
2913 thresh_ac = clamp(thresh_ac, min_thresh, max_thresh);
2915 var = cpi->fn_ptr[y_size].vf(x->plane[0].src.buf, x->plane[0].src.stride,
2916 xd->plane[0].dst.buf,
2917 xd->plane[0].dst.stride, &sse);
2919 // Adjust threshold according to partition size.
2920 thresh_ac >>= 8 - (b_width_log2_lookup[bsize] +
2921 b_height_log2_lookup[bsize]);
2923 // Y skipping condition checking
2924 if (sse < thresh_ac || sse == 0) {
2925 // Skipping threshold for dc
2926 unsigned int thresh_dc;
2928 thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6);
2930 // dc skipping checking
2931 if ((sse - var) < thresh_dc || sse == var) {
2932 unsigned int sse_u, sse_v;
2933 unsigned int var_u, var_v;
2935 var_u = cpi->fn_ptr[uv_size].vf(x->plane[1].src.buf,
2936 x->plane[1].src.stride,
2937 xd->plane[1].dst.buf,
2938 xd->plane[1].dst.stride, &sse_u);
2940 // U skipping condition checking
2941 if ((sse_u * 4 < thresh_ac || sse_u == 0) &&
2942 (sse_u - var_u < thresh_dc || sse_u == var_u)) {
2943 var_v = cpi->fn_ptr[uv_size].vf(x->plane[2].src.buf,
2944 x->plane[2].src.stride,
2945 xd->plane[2].dst.buf,
2946 xd->plane[2].dst.stride, &sse_v);
2948 // V skipping condition checking
2949 if ((sse_v * 4 < thresh_ac || sse_v == 0) &&
2950 (sse_v - var_v < thresh_dc || sse_v == var_v)) {
2953 // The cost of skip bit needs to be added.
2954 *rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
2956 // Scaling factor for SSE from spatial domain to frequency domain
2957 // is 16. Adjust distortion accordingly.
2958 *distortion_uv = (sse_u + sse_v) << 4;
2959 *distortion = (sse << 4) + *distortion_uv;
2962 this_rd = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
2971 int skippable_y, skippable_uv;
2972 int64_t sseuv = INT64_MAX;
2973 int64_t rdcosty = INT64_MAX;
2975 // Y cost and distortion
2976 super_block_yrd(cpi, x, rate_y, distortion_y, &skippable_y, psse,
2977 bsize, txfm_cache, ref_best_rd);
2979 if (*rate_y == INT_MAX) {
2981 *distortion = INT64_MAX;
2982 restore_dst_buf(xd, orig_dst, orig_dst_stride);
2987 *distortion += *distortion_y;
2989 rdcosty = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
2990 rdcosty = MIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse));
2992 super_block_uvrd(cpi, x, rate_uv, distortion_uv, &skippable_uv, &sseuv,
2993 bsize, ref_best_rd - rdcosty);
2994 if (*rate_uv == INT_MAX) {
2996 *distortion = INT64_MAX;
2997 restore_dst_buf(xd, orig_dst, orig_dst_stride);
3003 *distortion += *distortion_uv;
3004 *skippable = skippable_y && skippable_uv;
3007 restore_dst_buf(xd, orig_dst, orig_dst_stride);
3008 return this_rd; // if 0, this will be re-calculated by caller
3011 static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
3013 struct macroblock_plane *const p = x->plane;
3014 struct macroblockd_plane *const pd = x->e_mbd.plane;
3017 for (i = 0; i < max_plane; ++i) {
3018 p[i].coeff = ctx->coeff_pbuf[i][1];
3019 p[i].qcoeff = ctx->qcoeff_pbuf[i][1];
3020 pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1];
3021 p[i].eobs = ctx->eobs_pbuf[i][1];
3023 ctx->coeff_pbuf[i][1] = ctx->coeff_pbuf[i][0];
3024 ctx->qcoeff_pbuf[i][1] = ctx->qcoeff_pbuf[i][0];
3025 ctx->dqcoeff_pbuf[i][1] = ctx->dqcoeff_pbuf[i][0];
3026 ctx->eobs_pbuf[i][1] = ctx->eobs_pbuf[i][0];
3028 ctx->coeff_pbuf[i][0] = p[i].coeff;
3029 ctx->qcoeff_pbuf[i][0] = p[i].qcoeff;
3030 ctx->dqcoeff_pbuf[i][0] = pd[i].dqcoeff;
3031 ctx->eobs_pbuf[i][0] = p[i].eobs;
3035 void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
3036 int *returnrate, int64_t *returndist,
3038 PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
3039 VP9_COMMON *const cm = &cpi->common;
3040 MACROBLOCKD *const xd = &x->e_mbd;
3041 int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
3042 int y_skip = 0, uv_skip = 0;
3043 int64_t dist_y = 0, dist_uv = 0, tx_cache[TX_MODES] = { 0 };
3044 TX_SIZE max_uv_tx_size;
3047 xd->mi_8x8[0]->mbmi.ref_frame[0] = INTRA_FRAME;
3049 if (bsize >= BLOCK_8X8) {
3050 if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly,
3051 &dist_y, &y_skip, bsize, tx_cache,
3052 best_rd) >= best_rd) {
3053 *returnrate = INT_MAX;
3056 max_uv_tx_size = get_uv_tx_size_impl(xd->mi_8x8[0]->mbmi.tx_size, bsize);
3057 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
3058 &dist_uv, &uv_skip, bsize, max_uv_tx_size);
3061 if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate_y, &rate_y_tokenonly,
3062 &dist_y, best_rd) >= best_rd) {
3063 *returnrate = INT_MAX;
3066 max_uv_tx_size = get_uv_tx_size_impl(xd->mi_8x8[0]->mbmi.tx_size, bsize);
3067 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
3068 &dist_uv, &uv_skip, BLOCK_8X8, max_uv_tx_size);
3071 if (y_skip && uv_skip) {
3072 *returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
3073 vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
3074 *returndist = dist_y + dist_uv;
3075 vp9_zero(ctx->tx_rd_diff);
3078 *returnrate = rate_y + rate_uv + vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
3079 *returndist = dist_y + dist_uv;
3080 if (cpi->sf.tx_size_search_method == USE_FULL_RD)
3081 for (i = 0; i < TX_MODES; i++) {
3082 if (tx_cache[i] < INT64_MAX && tx_cache[cm->tx_mode] < INT64_MAX)
3083 ctx->tx_rd_diff[i] = tx_cache[i] - tx_cache[cm->tx_mode];
3085 ctx->tx_rd_diff[i] = 0;
3089 ctx->mic = *xd->mi_8x8[0];
3092 int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
3093 const TileInfo *const tile,
3094 int mi_row, int mi_col,
3096 int64_t *returndistortion,
3098 PICK_MODE_CONTEXT *ctx,
3099 int64_t best_rd_so_far) {
3100 VP9_COMMON *cm = &cpi->common;
3101 MACROBLOCKD *xd = &x->e_mbd;
3102 MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
3103 const struct segmentation *seg = &cm->seg;
3104 const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]);
3105 MB_PREDICTION_MODE this_mode;
3106 MV_REFERENCE_FRAME ref_frame, second_ref_frame;
3107 unsigned char segment_id = mbmi->segment_id;
3109 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
3110 struct buf_2d yv12_mb[4][MAX_MB_PLANE];
3111 int_mv single_newmv[MAX_REF_FRAMES] = { { 0 } };
3112 static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
3114 int64_t best_rd = best_rd_so_far;
3115 int64_t best_tx_rd[TX_MODES];
3116 int64_t best_tx_diff[TX_MODES];
3117 int64_t best_pred_diff[REFERENCE_MODES];
3118 int64_t best_pred_rd[REFERENCE_MODES];
3119 int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
3120 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
3121 MB_MODE_INFO best_mbmode = { 0 };
3122 int mode_index, best_mode_index = 0;
3123 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
3124 vp9_prob comp_mode_p;
3125 int64_t best_intra_rd = INT64_MAX;
3126 int64_t best_inter_rd = INT64_MAX;
3127 MB_PREDICTION_MODE best_intra_mode = DC_PRED;
3128 MV_REFERENCE_FRAME best_inter_ref_frame = LAST_FRAME;
3129 INTERP_FILTER tmp_best_filter = SWITCHABLE;
3130 int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES];
3131 int64_t dist_uv[TX_SIZES];
3132 int skip_uv[TX_SIZES];
3133 MB_PREDICTION_MODE mode_uv[TX_SIZES];
3134 int64_t mode_distortions[MB_MODE_COUNT] = {-1};
3135 int intra_cost_penalty = 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q);
3136 const int bws = num_8x8_blocks_wide_lookup[bsize] / 2;
3137 const int bhs = num_8x8_blocks_high_lookup[bsize] / 2;
3140 x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
3142 // Everywhere the flag is set the error is much higher than its neighbors.
3143 ctx->modes_with_high_error = 0;
3145 estimate_ref_frame_costs(cpi, segment_id, ref_costs_single, ref_costs_comp,
3148 for (i = 0; i < REFERENCE_MODES; ++i)
3149 best_pred_rd[i] = INT64_MAX;
3150 for (i = 0; i < TX_MODES; i++)
3151 best_tx_rd[i] = INT64_MAX;
3152 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
3153 best_filter_rd[i] = INT64_MAX;
3154 for (i = 0; i < TX_SIZES; i++)
3155 rate_uv_intra[i] = INT_MAX;
3156 for (i = 0; i < MAX_REF_FRAMES; ++i)
3157 x->pred_sse[i] = INT_MAX;
3159 *returnrate = INT_MAX;
3161 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3162 x->pred_mv_sad[ref_frame] = INT_MAX;
3163 if (cpi->ref_frame_flags & flag_list[ref_frame]) {
3164 vp9_setup_buffer_inter(cpi, x, tile,
3165 ref_frame, block_size, mi_row, mi_col,
3166 frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb);
3168 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
3169 frame_mv[ZEROMV][ref_frame].as_int = 0;
3172 cpi->ref_frame_mask = 0;
3173 for (ref_frame = LAST_FRAME;
3174 ref_frame <= ALTREF_FRAME && cpi->sf.reference_masking; ++ref_frame) {
3176 for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
3177 if ((x->pred_mv_sad[ref_frame] >> 2) > x->pred_mv_sad[i]) {
3178 cpi->ref_frame_mask |= (1 << ref_frame);
3184 for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
3185 int mode_excluded = 0;
3186 int64_t this_rd = INT64_MAX;
3187 int disable_skip = 0;
3188 int compmode_cost = 0;
3189 int rate2 = 0, rate_y = 0, rate_uv = 0;
3190 int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
3192 int64_t tx_cache[TX_MODES];
3195 int64_t total_sse = INT_MAX;
3198 for (i = 0; i < TX_MODES; ++i)
3199 tx_cache[i] = INT64_MAX;
3202 this_mode = vp9_mode_order[mode_index].mode;
3203 ref_frame = vp9_mode_order[mode_index].ref_frame[0];
3204 second_ref_frame = vp9_mode_order[mode_index].ref_frame[1];
3206 // Look at the reference frame of the best mode so far and set the
3207 // skip mask to look at a subset of the remaining modes.
3208 if (mode_index > cpi->sf.mode_skip_start) {
3209 if (mode_index == (cpi->sf.mode_skip_start + 1)) {
3210 switch (vp9_mode_order[best_mode_index].ref_frame[0]) {
3212 cpi->mode_skip_mask = 0;
3215 cpi->mode_skip_mask = LAST_FRAME_MODE_MASK;
3218 cpi->mode_skip_mask = GOLDEN_FRAME_MODE_MASK;
3221 cpi->mode_skip_mask = ALT_REF_MODE_MASK;
3224 case MAX_REF_FRAMES:
3225 assert(0 && "Invalid Reference frame");
3228 if (cpi->mode_skip_mask & ((int64_t)1 << mode_index))
3232 // Skip if the current reference frame has been masked off
3233 if (cpi->ref_frame_mask & (1 << ref_frame) && this_mode != NEWMV)
3236 // Test best rd so far against threshold for trying this mode.
3237 if ((best_rd < ((int64_t)cpi->rd_threshes[segment_id][bsize][mode_index] *
3238 cpi->rd_thresh_freq_fact[bsize][mode_index] >> 5)) ||
3239 cpi->rd_threshes[segment_id][bsize][mode_index] == INT_MAX)
3242 // Do not allow compound prediction if the segment level reference
3243 // frame feature is in use as in this case there can only be one reference.
3244 if ((second_ref_frame > INTRA_FRAME) &&
3245 vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
3248 mbmi->ref_frame[0] = ref_frame;
3249 mbmi->ref_frame[1] = second_ref_frame;
3251 if (!(ref_frame == INTRA_FRAME
3252 || (cpi->ref_frame_flags & flag_list[ref_frame]))) {
3255 if (!(second_ref_frame == NONE
3256 || (cpi->ref_frame_flags & flag_list[second_ref_frame]))) {
3260 comp_pred = second_ref_frame > INTRA_FRAME;
3262 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA)
3263 if (vp9_mode_order[best_mode_index].ref_frame[0] == INTRA_FRAME)
3265 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH)
3266 if (ref_frame != best_inter_ref_frame &&
3267 second_ref_frame != best_inter_ref_frame)
3271 set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
3272 mbmi->uv_mode = DC_PRED;
3274 // Evaluate all sub-pel filters irrespective of whether we can use
3275 // them for this frame.
3276 mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
3277 : cm->interp_filter;
3278 xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter);
3281 if (!(cpi->ref_frame_flags & flag_list[second_ref_frame]))
3284 mode_excluded = mode_excluded ? mode_excluded
3285 : cm->reference_mode == SINGLE_REFERENCE;
3287 if (ref_frame != INTRA_FRAME && second_ref_frame != INTRA_FRAME)
3288 mode_excluded = mode_excluded ?
3289 mode_excluded : cm->reference_mode == COMPOUND_REFERENCE;
3292 // Select prediction reference frames.
3293 for (i = 0; i < MAX_MB_PLANE; i++) {
3294 xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
3296 xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
3299 // If the segment reference frame feature is enabled....
3300 // then do nothing if the current ref frame is not allowed..
3301 if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
3302 vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) !=
3305 // If the segment skip feature is enabled....
3306 // then do nothing if the current mode is not allowed..
3307 } else if (vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP) &&
3308 (this_mode != ZEROMV && ref_frame != INTRA_FRAME)) {
3310 // Disable this drop out case if the ref frame
3311 // segment level feature is enabled for this segment. This is to
3312 // prevent the possibility that we end up unable to pick any mode.
3313 } else if (!vp9_segfeature_active(seg, segment_id,
3314 SEG_LVL_REF_FRAME)) {
3315 // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
3316 // unless ARNR filtering is enabled in which case we want
3317 // an unfiltered alternative. We allow near/nearest as well
3318 // because they may result in zero-zero MVs but be cheaper.
3319 if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
3320 if ((this_mode != ZEROMV &&
3321 !(this_mode == NEARMV &&
3322 frame_mv[NEARMV][ALTREF_FRAME].as_int == 0) &&
3323 !(this_mode == NEARESTMV &&
3324 frame_mv[NEARESTMV][ALTREF_FRAME].as_int == 0)) ||
3325 ref_frame != ALTREF_FRAME) {
3330 // TODO(JBB): This is to make up for the fact that we don't have sad
3331 // functions that work when the block size reads outside the umv. We
3332 // should fix this either by making the motion search just work on
3333 // a representative block in the boundary ( first ) and then implement a
3334 // function that does sads when inside the border..
3335 if (((mi_row + bhs) > cm->mi_rows || (mi_col + bws) > cm->mi_cols) &&
3336 this_mode == NEWMV) {
3340 #ifdef MODE_TEST_HIT_STATS
3342 // Keep a rcord of the number of test hits at each size
3343 cpi->mode_test_hits[bsize]++;
3347 if (ref_frame == INTRA_FRAME) {
3349 // Disable intra modes other than DC_PRED for blocks with low variance
3350 // Threshold for intra skipping based on source variance
3351 // TODO(debargha): Specialize the threshold for super block sizes
3352 static const unsigned int skip_intra_var_thresh[BLOCK_SIZES] = {
3353 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
3355 if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
3356 this_mode != DC_PRED &&
3357 x->source_variance < skip_intra_var_thresh[mbmi->sb_type])
3359 // Only search the oblique modes if the best so far is
3360 // one of the neighboring directional modes
3361 if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
3362 (this_mode >= D45_PRED && this_mode <= TM_PRED)) {
3363 if (vp9_mode_order[best_mode_index].ref_frame[0] > INTRA_FRAME)
3366 mbmi->mode = this_mode;
3367 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
3368 if (conditional_skipintra(mbmi->mode, best_intra_mode))
3372 super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL,
3373 bsize, tx_cache, best_rd);
3375 if (rate_y == INT_MAX)
3378 uv_tx = get_uv_tx_size_impl(mbmi->tx_size, bsize);
3379 if (rate_uv_intra[uv_tx] == INT_MAX) {
3380 choose_intra_uv_mode(cpi, ctx, bsize, uv_tx,
3381 &rate_uv_intra[uv_tx], &rate_uv_tokenonly[uv_tx],
3382 &dist_uv[uv_tx], &skip_uv[uv_tx], &mode_uv[uv_tx]);
3385 rate_uv = rate_uv_tokenonly[uv_tx];
3386 distortion_uv = dist_uv[uv_tx];
3387 skippable = skippable && skip_uv[uv_tx];
3388 mbmi->uv_mode = mode_uv[uv_tx];
3390 rate2 = rate_y + x->mbmode_cost[mbmi->mode] + rate_uv_intra[uv_tx];
3391 if (this_mode != DC_PRED && this_mode != TM_PRED)
3392 rate2 += intra_cost_penalty;
3393 distortion2 = distortion_y + distortion_uv;
3395 mbmi->mode = this_mode;
3396 compmode_cost = vp9_cost_bit(comp_mode_p, second_ref_frame > INTRA_FRAME);
3397 this_rd = handle_inter_mode(cpi, x, tile, bsize,
3399 &rate2, &distortion2, &skippable,
3400 &rate_y, &distortion_y,
3401 &rate_uv, &distortion_uv,
3402 &mode_excluded, &disable_skip,
3403 &tmp_best_filter, frame_mv,
3405 single_newmv, &total_sse, best_rd);
3406 if (this_rd == INT64_MAX)
3410 if (cm->reference_mode == REFERENCE_MODE_SELECT)
3411 rate2 += compmode_cost;
3413 // Estimate the reference frame signaling cost and add it
3414 // to the rolling cost variable.
3415 if (second_ref_frame > INTRA_FRAME) {
3416 rate2 += ref_costs_comp[ref_frame];
3418 rate2 += ref_costs_single[ref_frame];
3421 if (!disable_skip) {
3422 // Test for the condition where skip block will be activated
3423 // because there are no non zero coefficients and make any
3424 // necessary adjustment for rate. Ignore if skip is coded at
3425 // segment level as the cost wont have been added in.
3426 // Is Mb level skip allowed (i.e. not coded at segment level).
3427 const int mb_skip_allowed = !vp9_segfeature_active(seg, segment_id,
3431 // Back out the coefficient coding costs
3432 rate2 -= (rate_y + rate_uv);
3433 // for best yrd calculation
3436 if (mb_skip_allowed) {
3439 // Cost the skip mb case
3440 vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
3442 prob_skip_cost = vp9_cost_bit(skip_prob, 1);
3443 rate2 += prob_skip_cost;
3446 } else if (mb_skip_allowed && ref_frame != INTRA_FRAME && !xd->lossless) {
3447 if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
3448 RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
3449 // Add in the cost of the no skip flag.
3450 rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
3452 // FIXME(rbultje) make this work for splitmv also
3453 rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
3454 distortion2 = total_sse;
3455 assert(total_sse >= 0);
3456 rate2 -= (rate_y + rate_uv);
3461 } else if (mb_skip_allowed) {
3462 // Add in the cost of the no skip flag.
3463 rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
3466 // Calculate the final RD estimate for this mode.
3467 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
3470 // Keep record of best intra rd
3471 if (!is_inter_block(&xd->mi_8x8[0]->mbmi) &&
3472 this_rd < best_intra_rd) {
3473 best_intra_rd = this_rd;
3474 best_intra_mode = xd->mi_8x8[0]->mbmi.mode;
3477 // Keep record of best inter rd with single reference
3478 if (is_inter_block(&xd->mi_8x8[0]->mbmi) &&
3479 !has_second_ref(&xd->mi_8x8[0]->mbmi) &&
3480 !mode_excluded && this_rd < best_inter_rd) {
3481 best_inter_rd = this_rd;
3482 best_inter_ref_frame = ref_frame;
3485 if (!disable_skip && ref_frame == INTRA_FRAME) {
3486 for (i = 0; i < REFERENCE_MODES; ++i)
3487 best_pred_rd[i] = MIN(best_pred_rd[i], this_rd);
3488 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
3489 best_filter_rd[i] = MIN(best_filter_rd[i], this_rd);
3492 // Store the respective mode distortions for later use.
3493 if (mode_distortions[this_mode] == -1
3494 || distortion2 < mode_distortions[this_mode]) {
3495 mode_distortions[this_mode] = distortion2;
3498 // Did this mode help.. i.e. is it the new best mode
3499 if (this_rd < best_rd || x->skip) {
3500 int max_plane = MAX_MB_PLANE;
3501 if (!mode_excluded) {
3502 // Note index of best mode so far
3503 best_mode_index = mode_index;
3505 if (ref_frame == INTRA_FRAME) {
3506 /* required for left and above block mv */
3507 mbmi->mv[0].as_int = 0;
3511 *returnrate = rate2;
3512 *returndistortion = distortion2;
3514 best_mbmode = *mbmi;
3515 best_skip2 = this_skip2;
3516 if (!x->select_txfm_size)
3517 swap_block_ptr(x, ctx, max_plane);
3518 vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
3519 sizeof(uint8_t) * ctx->num_4x4_blk);
3521 // TODO(debargha): enhance this test with a better distortion prediction
3522 // based on qp, activity mask and history
3523 if ((cpi->sf.mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
3524 (mode_index > MIN_EARLY_TERM_INDEX)) {
3525 const int qstep = xd->plane[0].dequant[1];
3526 // TODO(debargha): Enhance this by specializing for each mode_index
3528 if (x->source_variance < UINT_MAX) {
3529 const int var_adjust = (x->source_variance < 16);
3530 scale -= var_adjust;
3532 if (ref_frame > INTRA_FRAME &&
3533 distortion2 * scale < qstep * qstep) {
3540 /* keep record of best compound/single-only prediction */
3541 if (!disable_skip && ref_frame != INTRA_FRAME) {
3542 int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
3544 if (cm->reference_mode == REFERENCE_MODE_SELECT) {
3545 single_rate = rate2 - compmode_cost;
3546 hybrid_rate = rate2;
3548 single_rate = rate2;
3549 hybrid_rate = rate2 + compmode_cost;
3552 single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
3553 hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
3555 if (second_ref_frame <= INTRA_FRAME &&
3556 single_rd < best_pred_rd[SINGLE_REFERENCE]) {
3557 best_pred_rd[SINGLE_REFERENCE] = single_rd;
3558 } else if (second_ref_frame > INTRA_FRAME &&
3559 single_rd < best_pred_rd[COMPOUND_REFERENCE]) {
3560 best_pred_rd[COMPOUND_REFERENCE] = single_rd;
3562 if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT])
3563 best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
3566 /* keep record of best filter type */
3567 if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME &&
3568 cm->interp_filter != BILINEAR) {
3569 int64_t ref = cpi->rd_filter_cache[cm->interp_filter == SWITCHABLE ?
3570 SWITCHABLE_FILTERS : cm->interp_filter];
3572 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
3574 if (ref == INT64_MAX)
3576 else if (cpi->rd_filter_cache[i] == INT64_MAX)
3577 // when early termination is triggered, the encoder does not have
3578 // access to the rate-distortion cost. it only knows that the cost
3579 // should be above the maximum valid value. hence it takes the known
3580 // maximum plus an arbitrary constant as the rate-distortion cost.
3581 adj_rd = cpi->mask_filter_rd - ref + 10;
3583 adj_rd = cpi->rd_filter_cache[i] - ref;
3586 best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd);
3590 /* keep record of best txfm size */
3591 if (bsize < BLOCK_32X32) {
3592 if (bsize < BLOCK_16X16)
3593 tx_cache[ALLOW_16X16] = tx_cache[ALLOW_8X8];
3595 tx_cache[ALLOW_32X32] = tx_cache[ALLOW_16X16];
3597 if (!mode_excluded && this_rd != INT64_MAX) {
3598 for (i = 0; i < TX_MODES && tx_cache[i] < INT64_MAX; i++) {
3599 int64_t adj_rd = INT64_MAX;
3600 adj_rd = this_rd + tx_cache[i] - tx_cache[cm->tx_mode];
3602 if (adj_rd < best_tx_rd[i])
3603 best_tx_rd[i] = adj_rd;
3610 if (x->skip && !comp_pred)
3614 if (best_rd >= best_rd_so_far)
3617 // If we used an estimate for the uv intra rd in the loop above...
3618 if (cpi->sf.use_uv_intra_rd_estimate) {
3619 // Do Intra UV best rd mode selection if best mode choice above was intra.
3620 if (vp9_mode_order[best_mode_index].ref_frame[0] == INTRA_FRAME) {
3622 *mbmi = best_mbmode;
3623 uv_tx_size = get_uv_tx_size(mbmi);
3624 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size],
3625 &rate_uv_tokenonly[uv_tx_size],
3626 &dist_uv[uv_tx_size],
3627 &skip_uv[uv_tx_size],
3628 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize,
3633 // Flag all modes that have a distortion thats > 2x the best we found at
3635 for (mode_index = 0; mode_index < MB_MODE_COUNT; ++mode_index) {
3636 if (mode_index == NEARESTMV || mode_index == NEARMV || mode_index == NEWMV)
3639 if (mode_distortions[mode_index] > 2 * *returndistortion) {
3640 ctx->modes_with_high_error |= (1 << mode_index);
3644 assert((cm->interp_filter == SWITCHABLE) ||
3645 (cm->interp_filter == best_mbmode.interp_filter) ||
3646 !is_inter_block(&best_mbmode));
3648 // Updating rd_thresh_freq_fact[] here means that the different
3649 // partition/block sizes are handled independently based on the best
3650 // choice for the current partition. It may well be better to keep a scaled
3651 // best rd so far value and update rd_thresh_freq_fact based on the mode/size
3652 // combination that wins out.
3653 if (cpi->sf.adaptive_rd_thresh) {
3654 for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
3655 if (mode_index == best_mode_index) {
3656 cpi->rd_thresh_freq_fact[bsize][mode_index] -=
3657 (cpi->rd_thresh_freq_fact[bsize][mode_index] >> 3);
3659 cpi->rd_thresh_freq_fact[bsize][mode_index] += RD_THRESH_INC;
3660 if (cpi->rd_thresh_freq_fact[bsize][mode_index] >
3661 (cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT)) {
3662 cpi->rd_thresh_freq_fact[bsize][mode_index] =
3663 cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT;
3670 *mbmi = best_mbmode;
3671 x->skip |= best_skip2;
3673 for (i = 0; i < REFERENCE_MODES; ++i) {
3674 if (best_pred_rd[i] == INT64_MAX)
3675 best_pred_diff[i] = INT_MIN;
3677 best_pred_diff[i] = best_rd - best_pred_rd[i];
3681 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
3682 if (best_filter_rd[i] == INT64_MAX)
3683 best_filter_diff[i] = 0;
3685 best_filter_diff[i] = best_rd - best_filter_rd[i];
3687 if (cm->interp_filter == SWITCHABLE)
3688 assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
3690 vp9_zero(best_filter_diff);
3694 for (i = 0; i < TX_MODES; i++) {
3695 if (best_tx_rd[i] == INT64_MAX)
3696 best_tx_diff[i] = 0;
3698 best_tx_diff[i] = best_rd - best_tx_rd[i];
3701 vp9_zero(best_tx_diff);
3704 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3705 store_coding_context(x, ctx, best_mode_index,
3706 &mbmi->ref_mvs[mbmi->ref_frame[0]][0],
3707 &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 :
3708 mbmi->ref_frame[1]][0],
3709 best_pred_diff, best_tx_diff, best_filter_diff);
3715 int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
3716 const TileInfo *const tile,
3717 int mi_row, int mi_col,
3719 int64_t *returndistortion,
3721 PICK_MODE_CONTEXT *ctx,
3722 int64_t best_rd_so_far) {
3723 VP9_COMMON *cm = &cpi->common;
3724 MACROBLOCKD *xd = &x->e_mbd;
3725 MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
3726 const struct segmentation *seg = &cm->seg;
3727 const BLOCK_SIZE block_size = get_plane_block_size(bsize, &xd->plane[0]);
3728 MV_REFERENCE_FRAME ref_frame, second_ref_frame;
3729 unsigned char segment_id = mbmi->segment_id;
3731 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
3732 struct buf_2d yv12_mb[4][MAX_MB_PLANE];
3733 static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
3735 int64_t best_rd = best_rd_so_far;
3736 int64_t best_yrd = best_rd_so_far; // FIXME(rbultje) more precise
3737 int64_t best_tx_rd[TX_MODES];
3738 int64_t best_tx_diff[TX_MODES];
3739 int64_t best_pred_diff[REFERENCE_MODES];
3740 int64_t best_pred_rd[REFERENCE_MODES];
3741 int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
3742 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
3743 MB_MODE_INFO best_mbmode = { 0 };
3744 int mode_index, best_mode_index = 0;
3745 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
3746 vp9_prob comp_mode_p;
3747 int64_t best_inter_rd = INT64_MAX;
3748 MV_REFERENCE_FRAME best_inter_ref_frame = LAST_FRAME;
3749 INTERP_FILTER tmp_best_filter = SWITCHABLE;
3750 int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES];
3751 int64_t dist_uv[TX_SIZES];
3752 int skip_uv[TX_SIZES];
3753 MB_PREDICTION_MODE mode_uv[TX_SIZES] = { 0 };
3754 int intra_cost_penalty = 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q);
3755 int_mv seg_mvs[4][MAX_REF_FRAMES];
3756 b_mode_info best_bmodes[4];
3759 x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
3760 vpx_memset(x->zcoeff_blk[TX_4X4], 0, 4);
3762 for (i = 0; i < 4; i++) {
3764 for (j = 0; j < MAX_REF_FRAMES; j++)
3765 seg_mvs[i][j].as_int = INVALID_MV;
3768 estimate_ref_frame_costs(cpi, segment_id, ref_costs_single, ref_costs_comp,
3771 for (i = 0; i < REFERENCE_MODES; ++i)
3772 best_pred_rd[i] = INT64_MAX;
3773 for (i = 0; i < TX_MODES; i++)
3774 best_tx_rd[i] = INT64_MAX;
3775 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
3776 best_filter_rd[i] = INT64_MAX;
3777 for (i = 0; i < TX_SIZES; i++)
3778 rate_uv_intra[i] = INT_MAX;
3780 *returnrate = INT_MAX;
3782 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
3783 if (cpi->ref_frame_flags & flag_list[ref_frame]) {
3784 vp9_setup_buffer_inter(cpi, x, tile,
3785 ref_frame, block_size, mi_row, mi_col,
3786 frame_mv[NEARESTMV], frame_mv[NEARMV],
3789 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
3790 frame_mv[ZEROMV][ref_frame].as_int = 0;
3793 cpi->ref_frame_mask = 0;
3794 for (ref_frame = LAST_FRAME;
3795 ref_frame <= ALTREF_FRAME && cpi->sf.reference_masking; ++ref_frame) {
3797 for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
3798 if ((x->pred_mv_sad[ref_frame] >> 1) > x->pred_mv_sad[i]) {
3799 cpi->ref_frame_mask |= (1 << ref_frame);
3805 for (mode_index = 0; mode_index < MAX_REFS; ++mode_index) {
3806 int mode_excluded = 0;
3807 int64_t this_rd = INT64_MAX;
3808 int disable_skip = 0;
3809 int compmode_cost = 0;
3810 int rate2 = 0, rate_y = 0, rate_uv = 0;
3811 int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
3813 int64_t tx_cache[TX_MODES];
3816 int64_t total_sse = INT_MAX;
3819 for (i = 0; i < TX_MODES; ++i)
3820 tx_cache[i] = INT64_MAX;
3823 ref_frame = vp9_ref_order[mode_index].ref_frame[0];
3824 second_ref_frame = vp9_ref_order[mode_index].ref_frame[1];
3826 // Look at the reference frame of the best mode so far and set the
3827 // skip mask to look at a subset of the remaining modes.
3828 if (mode_index > 2 && cpi->sf.mode_skip_start < MAX_MODES) {
3829 if (mode_index == 3) {
3830 switch (vp9_ref_order[best_mode_index].ref_frame[0]) {
3832 cpi->mode_skip_mask = 0;
3835 cpi->mode_skip_mask = 0x0010;
3838 cpi->mode_skip_mask = 0x0008;
3841 cpi->mode_skip_mask = 0x0000;
3844 case MAX_REF_FRAMES:
3845 assert(0 && "Invalid Reference frame");
3848 if (cpi->mode_skip_mask & ((int64_t)1 << mode_index))
3852 // Test best rd so far against threshold for trying this mode.
3854 ((int64_t)cpi->rd_thresh_sub8x8[segment_id][bsize][mode_index] *
3855 cpi->rd_thresh_freq_sub8x8[bsize][mode_index] >> 5)) ||
3856 cpi->rd_thresh_sub8x8[segment_id][bsize][mode_index] == INT_MAX)
3859 // Do not allow compound prediction if the segment level reference
3860 // frame feature is in use as in this case there can only be one reference.
3861 if ((second_ref_frame > INTRA_FRAME) &&
3862 vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
3865 mbmi->ref_frame[0] = ref_frame;
3866 mbmi->ref_frame[1] = second_ref_frame;
3868 if (!(ref_frame == INTRA_FRAME
3869 || (cpi->ref_frame_flags & flag_list[ref_frame]))) {
3872 if (!(second_ref_frame == NONE
3873 || (cpi->ref_frame_flags & flag_list[second_ref_frame]))) {
3877 comp_pred = second_ref_frame > INTRA_FRAME;
3879 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA)
3880 if (vp9_ref_order[best_mode_index].ref_frame[0] == INTRA_FRAME)
3882 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH)
3883 if (ref_frame != best_inter_ref_frame &&
3884 second_ref_frame != best_inter_ref_frame)
3888 // TODO(jingning, jkoleszar): scaling reference frame not supported for
3890 if (ref_frame > 0 && vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf))
3893 if (second_ref_frame > 0 &&
3894 vp9_is_scaled(&cm->frame_refs[second_ref_frame - 1].sf))
3897 set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
3898 mbmi->uv_mode = DC_PRED;
3900 // Evaluate all sub-pel filters irrespective of whether we can use
3901 // them for this frame.
3902 mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
3903 : cm->interp_filter;
3904 xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter);
3907 if (!(cpi->ref_frame_flags & flag_list[second_ref_frame]))
3910 mode_excluded = mode_excluded ? mode_excluded
3911 : cm->reference_mode == SINGLE_REFERENCE;
3913 if (ref_frame != INTRA_FRAME && second_ref_frame != INTRA_FRAME) {
3914 mode_excluded = mode_excluded ?
3915 mode_excluded : cm->reference_mode == COMPOUND_REFERENCE;
3919 // Select prediction reference frames.
3920 for (i = 0; i < MAX_MB_PLANE; i++) {
3921 xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
3923 xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
3926 // If the segment reference frame feature is enabled....
3927 // then do nothing if the current ref frame is not allowed..
3928 if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
3929 vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) !=
3932 // If the segment skip feature is enabled....
3933 // then do nothing if the current mode is not allowed..
3934 } else if (vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP) &&
3935 ref_frame != INTRA_FRAME) {
3937 // Disable this drop out case if the ref frame
3938 // segment level feature is enabled for this segment. This is to
3939 // prevent the possibility that we end up unable to pick any mode.
3940 } else if (!vp9_segfeature_active(seg, segment_id,
3941 SEG_LVL_REF_FRAME)) {
3942 // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
3943 // unless ARNR filtering is enabled in which case we want
3944 // an unfiltered alternative. We allow near/nearest as well
3945 // because they may result in zero-zero MVs but be cheaper.
3946 if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0))
3950 #ifdef MODE_TEST_HIT_STATS
3952 // Keep a rcord of the number of test hits at each size
3953 cpi->mode_test_hits[bsize]++;
3956 if (ref_frame == INTRA_FRAME) {
3958 mbmi->tx_size = TX_4X4;
3959 if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate, &rate_y,
3960 &distortion_y, best_rd) >= best_rd)
3963 rate2 += intra_cost_penalty;
3964 distortion2 += distortion_y;
3966 if (rate_uv_intra[TX_4X4] == INT_MAX) {
3967 choose_intra_uv_mode(cpi, ctx, bsize, TX_4X4,
3968 &rate_uv_intra[TX_4X4],
3969 &rate_uv_tokenonly[TX_4X4],
3970 &dist_uv[TX_4X4], &skip_uv[TX_4X4],
3973 rate2 += rate_uv_intra[TX_4X4];
3974 rate_uv = rate_uv_tokenonly[TX_4X4];
3975 distortion2 += dist_uv[TX_4X4];
3976 distortion_uv = dist_uv[TX_4X4];
3977 mbmi->uv_mode = mode_uv[TX_4X4];
3978 tx_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
3979 for (i = 0; i < TX_MODES; ++i)
3980 tx_cache[i] = tx_cache[ONLY_4X4];
3984 int64_t this_rd_thresh;
3985 int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX;
3986 int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX;
3987 int64_t tmp_best_distortion = INT_MAX, tmp_best_sse, uv_sse;
3988 int tmp_best_skippable = 0;
3989 int switchable_filter_index;
3990 int_mv *second_ref = comp_pred ?
3991 &mbmi->ref_mvs[second_ref_frame][0] : NULL;
3992 b_mode_info tmp_best_bmodes[16];
3993 MB_MODE_INFO tmp_best_mbmode;
3994 BEST_SEG_INFO bsi[SWITCHABLE_FILTERS];
3995 int pred_exists = 0;
3998 this_rd_thresh = (ref_frame == LAST_FRAME) ?
3999 cpi->rd_thresh_sub8x8[segment_id][bsize][THR_LAST] :
4000 cpi->rd_thresh_sub8x8[segment_id][bsize][THR_ALTR];
4001 this_rd_thresh = (ref_frame == GOLDEN_FRAME) ?
4002 cpi->rd_thresh_sub8x8[segment_id][bsize][THR_GOLD] : this_rd_thresh;
4003 xd->mi_8x8[0]->mbmi.tx_size = TX_4X4;
4005 cpi->mask_filter_rd = 0;
4006 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
4007 cpi->rd_filter_cache[i] = INT64_MAX;
4009 if (cm->interp_filter != BILINEAR) {
4010 tmp_best_filter = EIGHTTAP;
4011 if (x->source_variance <
4012 cpi->sf.disable_filter_search_var_thresh) {
4013 tmp_best_filter = EIGHTTAP;
4014 } else if (cpi->sf.adaptive_pred_interp_filter == 1 &&
4015 ctx->pred_interp_filter < SWITCHABLE) {
4016 tmp_best_filter = ctx->pred_interp_filter;
4017 } else if (cpi->sf.adaptive_pred_interp_filter == 2) {
4018 tmp_best_filter = ctx->pred_interp_filter < SWITCHABLE ?
4019 ctx->pred_interp_filter : 0;
4021 for (switchable_filter_index = 0;
4022 switchable_filter_index < SWITCHABLE_FILTERS;
4023 ++switchable_filter_index) {
4026 mbmi->interp_filter = switchable_filter_index;
4027 xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter);
4028 tmp_rd = rd_pick_best_mbsegmentation(cpi, x, tile,
4029 &mbmi->ref_mvs[ref_frame][0],
4032 &rate, &rate_y, &distortion,
4033 &skippable, &total_sse,
4034 (int)this_rd_thresh, seg_mvs,
4035 bsi, switchable_filter_index,
4038 if (tmp_rd == INT64_MAX)
4040 rs = get_switchable_rate(x);
4041 rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
4042 cpi->rd_filter_cache[switchable_filter_index] = tmp_rd;
4043 cpi->rd_filter_cache[SWITCHABLE_FILTERS] =
4044 MIN(cpi->rd_filter_cache[SWITCHABLE_FILTERS],
4046 if (cm->interp_filter == SWITCHABLE)
4049 cpi->mask_filter_rd = MAX(cpi->mask_filter_rd, tmp_rd);
4051 newbest = (tmp_rd < tmp_best_rd);
4053 tmp_best_filter = mbmi->interp_filter;
4054 tmp_best_rd = tmp_rd;
4056 if ((newbest && cm->interp_filter == SWITCHABLE) ||
4057 (mbmi->interp_filter == cm->interp_filter &&
4058 cm->interp_filter != SWITCHABLE)) {
4059 tmp_best_rdu = tmp_rd;
4060 tmp_best_rate = rate;
4061 tmp_best_ratey = rate_y;
4062 tmp_best_distortion = distortion;
4063 tmp_best_sse = total_sse;
4064 tmp_best_skippable = skippable;
4065 tmp_best_mbmode = *mbmi;
4066 for (i = 0; i < 4; i++) {
4067 tmp_best_bmodes[i] = xd->mi_8x8[0]->bmi[i];
4068 x->zcoeff_blk[TX_4X4][i] = !x->plane[0].eobs[i];
4071 if (switchable_filter_index == 0 &&
4072 cpi->sf.use_rd_breakout &&
4073 best_rd < INT64_MAX) {
4074 if (tmp_best_rdu / 2 > best_rd) {
4075 // skip searching the other filters if the first is
4076 // already substantially larger than the best so far
4077 tmp_best_filter = mbmi->interp_filter;
4078 tmp_best_rdu = INT64_MAX;
4083 } // switchable_filter_index loop
4087 if (tmp_best_rdu == INT64_MAX && pred_exists)
4090 mbmi->interp_filter = (cm->interp_filter == SWITCHABLE ?
4091 tmp_best_filter : cm->interp_filter);
4092 xd->interp_kernel = vp9_get_interp_kernel(mbmi->interp_filter);
4094 // Handles the special case when a filter that is not in the
4095 // switchable list (bilinear, 6-tap) is indicated at the frame level
4096 tmp_rd = rd_pick_best_mbsegmentation(cpi, x, tile,
4097 &mbmi->ref_mvs[ref_frame][0],
4100 &rate, &rate_y, &distortion,
4101 &skippable, &total_sse,
4102 (int)this_rd_thresh, seg_mvs,
4105 if (tmp_rd == INT64_MAX)
4108 total_sse = tmp_best_sse;
4109 rate = tmp_best_rate;
4110 rate_y = tmp_best_ratey;
4111 distortion = tmp_best_distortion;
4112 skippable = tmp_best_skippable;
4113 *mbmi = tmp_best_mbmode;
4114 for (i = 0; i < 4; i++)
4115 xd->mi_8x8[0]->bmi[i] = tmp_best_bmodes[i];
4119 distortion2 += distortion;
4121 if (cm->interp_filter == SWITCHABLE)
4122 rate2 += get_switchable_rate(x);
4125 mode_excluded = comp_pred ? cm->reference_mode == SINGLE_REFERENCE
4126 : cm->reference_mode == COMPOUND_REFERENCE;
4128 compmode_cost = vp9_cost_bit(comp_mode_p, comp_pred);
4130 tmp_best_rdu = best_rd -
4131 MIN(RDCOST(x->rdmult, x->rddiv, rate2, distortion2),
4132 RDCOST(x->rdmult, x->rddiv, 0, total_sse));
4134 if (tmp_best_rdu > 0) {
4135 // If even the 'Y' rd value of split is higher than best so far
4136 // then dont bother looking at UV
4137 vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col,
4139 super_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,
4140 &uv_sse, BLOCK_8X8, tmp_best_rdu);
4141 if (rate_uv == INT_MAX)
4144 distortion2 += distortion_uv;
4145 skippable = skippable && uv_skippable;
4146 total_sse += uv_sse;
4148 tx_cache[ONLY_4X4] = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
4149 for (i = 0; i < TX_MODES; ++i)
4150 tx_cache[i] = tx_cache[ONLY_4X4];
4154 if (cm->reference_mode == REFERENCE_MODE_SELECT)
4155 rate2 += compmode_cost;
4157 // Estimate the reference frame signaling cost and add it
4158 // to the rolling cost variable.
4159 if (second_ref_frame > INTRA_FRAME) {
4160 rate2 += ref_costs_comp[ref_frame];
4162 rate2 += ref_costs_single[ref_frame];
4165 if (!disable_skip) {
4166 // Test for the condition where skip block will be activated
4167 // because there are no non zero coefficients and make any
4168 // necessary adjustment for rate. Ignore if skip is coded at
4169 // segment level as the cost wont have been added in.
4170 // Is Mb level skip allowed (i.e. not coded at segment level).
4171 const int mb_skip_allowed = !vp9_segfeature_active(seg, segment_id,
4174 if (mb_skip_allowed && ref_frame != INTRA_FRAME && !xd->lossless) {
4175 if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
4176 RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
4177 // Add in the cost of the no skip flag.
4178 rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
4180 // FIXME(rbultje) make this work for splitmv also
4181 rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
4182 distortion2 = total_sse;
4183 assert(total_sse >= 0);
4184 rate2 -= (rate_y + rate_uv);
4189 } else if (mb_skip_allowed) {
4190 // Add in the cost of the no skip flag.
4191 rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
4194 // Calculate the final RD estimate for this mode.
4195 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
4198 // Keep record of best inter rd with single reference
4199 if (is_inter_block(&xd->mi_8x8[0]->mbmi) &&
4200 !has_second_ref(&xd->mi_8x8[0]->mbmi) &&
4202 this_rd < best_inter_rd) {
4203 best_inter_rd = this_rd;
4204 best_inter_ref_frame = ref_frame;
4207 if (!disable_skip && ref_frame == INTRA_FRAME) {
4208 for (i = 0; i < REFERENCE_MODES; ++i)
4209 best_pred_rd[i] = MIN(best_pred_rd[i], this_rd);
4210 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
4211 best_filter_rd[i] = MIN(best_filter_rd[i], this_rd);
4214 // Did this mode help.. i.e. is it the new best mode
4215 if (this_rd < best_rd || x->skip) {
4216 if (!mode_excluded) {
4217 int max_plane = MAX_MB_PLANE;
4218 // Note index of best mode so far
4219 best_mode_index = mode_index;
4221 if (ref_frame == INTRA_FRAME) {
4222 /* required for left and above block mv */
4223 mbmi->mv[0].as_int = 0;
4227 *returnrate = rate2;
4228 *returndistortion = distortion2;
4230 best_yrd = best_rd -
4231 RDCOST(x->rdmult, x->rddiv, rate_uv, distortion_uv);
4232 best_mbmode = *mbmi;
4233 best_skip2 = this_skip2;
4234 if (!x->select_txfm_size)
4235 swap_block_ptr(x, ctx, max_plane);
4236 vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
4237 sizeof(uint8_t) * ctx->num_4x4_blk);
4239 for (i = 0; i < 4; i++)
4240 best_bmodes[i] = xd->mi_8x8[0]->bmi[i];
4242 // TODO(debargha): enhance this test with a better distortion prediction
4243 // based on qp, activity mask and history
4244 if ((cpi->sf.mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
4245 (mode_index > MIN_EARLY_TERM_INDEX)) {
4246 const int qstep = xd->plane[0].dequant[1];
4247 // TODO(debargha): Enhance this by specializing for each mode_index
4249 if (x->source_variance < UINT_MAX) {
4250 const int var_adjust = (x->source_variance < 16);
4251 scale -= var_adjust;
4253 if (ref_frame > INTRA_FRAME &&
4254 distortion2 * scale < qstep * qstep) {
4261 /* keep record of best compound/single-only prediction */
4262 if (!disable_skip && ref_frame != INTRA_FRAME) {
4263 int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
4265 if (cm->reference_mode == REFERENCE_MODE_SELECT) {
4266 single_rate = rate2 - compmode_cost;
4267 hybrid_rate = rate2;
4269 single_rate = rate2;
4270 hybrid_rate = rate2 + compmode_cost;
4273 single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
4274 hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
4276 if (second_ref_frame <= INTRA_FRAME &&
4277 single_rd < best_pred_rd[SINGLE_REFERENCE]) {
4278 best_pred_rd[SINGLE_REFERENCE] = single_rd;
4279 } else if (second_ref_frame > INTRA_FRAME &&
4280 single_rd < best_pred_rd[COMPOUND_REFERENCE]) {
4281 best_pred_rd[COMPOUND_REFERENCE] = single_rd;
4283 if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT])
4284 best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
4287 /* keep record of best filter type */
4288 if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME &&
4289 cm->interp_filter != BILINEAR) {
4290 int64_t ref = cpi->rd_filter_cache[cm->interp_filter == SWITCHABLE ?
4291 SWITCHABLE_FILTERS : cm->interp_filter];
4293 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
4294 if (ref == INT64_MAX)
4296 else if (cpi->rd_filter_cache[i] == INT64_MAX)
4297 // when early termination is triggered, the encoder does not have
4298 // access to the rate-distortion cost. it only knows that the cost
4299 // should be above the maximum valid value. hence it takes the known
4300 // maximum plus an arbitrary constant as the rate-distortion cost.
4301 adj_rd = cpi->mask_filter_rd - ref + 10;
4303 adj_rd = cpi->rd_filter_cache[i] - ref;
4306 best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd);
4310 /* keep record of best txfm size */
4311 if (bsize < BLOCK_32X32) {
4312 if (bsize < BLOCK_16X16) {
4313 tx_cache[ALLOW_8X8] = tx_cache[ONLY_4X4];
4314 tx_cache[ALLOW_16X16] = tx_cache[ALLOW_8X8];
4316 tx_cache[ALLOW_32X32] = tx_cache[ALLOW_16X16];
4318 if (!mode_excluded && this_rd != INT64_MAX) {
4319 for (i = 0; i < TX_MODES && tx_cache[i] < INT64_MAX; i++) {
4320 int64_t adj_rd = INT64_MAX;
4321 if (ref_frame > INTRA_FRAME)
4322 adj_rd = this_rd + tx_cache[i] - tx_cache[cm->tx_mode];
4326 if (adj_rd < best_tx_rd[i])
4327 best_tx_rd[i] = adj_rd;
4334 if (x->skip && !comp_pred)
4338 if (best_rd >= best_rd_so_far)
4341 // If we used an estimate for the uv intra rd in the loop above...
4342 if (cpi->sf.use_uv_intra_rd_estimate) {
4343 // Do Intra UV best rd mode selection if best mode choice above was intra.
4344 if (vp9_ref_order[best_mode_index].ref_frame[0] == INTRA_FRAME) {
4346 *mbmi = best_mbmode;
4347 uv_tx_size = get_uv_tx_size(mbmi);
4348 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size],
4349 &rate_uv_tokenonly[uv_tx_size],
4350 &dist_uv[uv_tx_size],
4351 &skip_uv[uv_tx_size],
4352 BLOCK_8X8, uv_tx_size);
4356 if (best_rd == INT64_MAX && bsize < BLOCK_8X8) {
4357 *returnrate = INT_MAX;
4358 *returndistortion = INT_MAX;
4362 assert((cm->interp_filter == SWITCHABLE) ||
4363 (cm->interp_filter == best_mbmode.interp_filter) ||
4364 !is_inter_block(&best_mbmode));
4366 // Updating rd_thresh_freq_fact[] here means that the different
4367 // partition/block sizes are handled independently based on the best
4368 // choice for the current partition. It may well be better to keep a scaled
4369 // best rd so far value and update rd_thresh_freq_fact based on the mode/size
4370 // combination that wins out.
4371 if (cpi->sf.adaptive_rd_thresh) {
4372 for (mode_index = 0; mode_index < MAX_REFS; ++mode_index) {
4373 if (mode_index == best_mode_index) {
4374 cpi->rd_thresh_freq_sub8x8[bsize][mode_index] -=
4375 (cpi->rd_thresh_freq_sub8x8[bsize][mode_index] >> 3);
4377 cpi->rd_thresh_freq_sub8x8[bsize][mode_index] += RD_THRESH_INC;
4378 if (cpi->rd_thresh_freq_sub8x8[bsize][mode_index] >
4379 (cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT)) {
4380 cpi->rd_thresh_freq_sub8x8[bsize][mode_index] =
4381 cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT;
4388 *mbmi = best_mbmode;
4389 x->skip |= best_skip2;
4390 if (!is_inter_block(&best_mbmode)) {
4391 for (i = 0; i < 4; i++)
4392 xd->mi_8x8[0]->bmi[i].as_mode = best_bmodes[i].as_mode;
4394 for (i = 0; i < 4; ++i)
4395 vpx_memcpy(&xd->mi_8x8[0]->bmi[i], &best_bmodes[i], sizeof(b_mode_info));
4397 mbmi->mv[0].as_int = xd->mi_8x8[0]->bmi[3].as_mv[0].as_int;
4398 mbmi->mv[1].as_int = xd->mi_8x8[0]->bmi[3].as_mv[1].as_int;
4401 for (i = 0; i < REFERENCE_MODES; ++i) {
4402 if (best_pred_rd[i] == INT64_MAX)
4403 best_pred_diff[i] = INT_MIN;
4405 best_pred_diff[i] = best_rd - best_pred_rd[i];
4409 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
4410 if (best_filter_rd[i] == INT64_MAX)
4411 best_filter_diff[i] = 0;
4413 best_filter_diff[i] = best_rd - best_filter_rd[i];
4415 if (cm->interp_filter == SWITCHABLE)
4416 assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
4418 vp9_zero(best_filter_diff);
4422 for (i = 0; i < TX_MODES; i++) {
4423 if (best_tx_rd[i] == INT64_MAX)
4424 best_tx_diff[i] = 0;
4426 best_tx_diff[i] = best_rd - best_tx_rd[i];
4429 vp9_zero(best_tx_diff);
4432 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
4433 store_coding_context(x, ctx, best_mode_index,
4434 &mbmi->ref_mvs[mbmi->ref_frame[0]][0],
4435 &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 :
4436 mbmi->ref_frame[1]][0],
4437 best_pred_diff, best_tx_diff, best_filter_diff);