2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
16 #include "./vp9_rtcd.h"
18 #include "vpx_mem/vpx_mem.h"
20 #include "vp9/common/vp9_common.h"
21 #include "vp9/common/vp9_entropy.h"
22 #include "vp9/common/vp9_entropymode.h"
23 #include "vp9/common/vp9_idct.h"
24 #include "vp9/common/vp9_mvref_common.h"
25 #include "vp9/common/vp9_pragmas.h"
26 #include "vp9/common/vp9_pred_common.h"
27 #include "vp9/common/vp9_quant_common.h"
28 #include "vp9/common/vp9_reconinter.h"
29 #include "vp9/common/vp9_reconintra.h"
30 #include "vp9/common/vp9_seg_common.h"
31 #include "vp9/common/vp9_systemdependent.h"
33 #include "vp9/encoder/vp9_cost.h"
34 #include "vp9/encoder/vp9_encodemb.h"
35 #include "vp9/encoder/vp9_encodemv.h"
36 #include "vp9/encoder/vp9_encoder.h"
37 #include "vp9/encoder/vp9_mcomp.h"
38 #include "vp9/encoder/vp9_quantize.h"
39 #include "vp9/encoder/vp9_ratectrl.h"
40 #include "vp9/encoder/vp9_rdopt.h"
41 #include "vp9/encoder/vp9_tokenize.h"
42 #include "vp9/encoder/vp9_variance.h"
44 #define RD_THRESH_MAX_FACT 64
45 #define RD_THRESH_INC 1
46 #define RD_THRESH_POW 1.25
47 #define RD_MULT_EPB_RATIO 64
49 /* Factor to weigh the rate for switchable interp filters */
50 #define SWITCHABLE_INTERP_RATE_FACTOR 1
52 #define LAST_FRAME_MODE_MASK 0xFFEDCD60
53 #define GOLDEN_FRAME_MODE_MASK 0xFFDA3BB0
54 #define ALT_REF_MODE_MASK 0xFFC648D0
56 #define MIN_EARLY_TERM_INDEX 3
60 MV_REFERENCE_FRAME ref_frame[2];
64 MV_REFERENCE_FRAME ref_frame[2];
67 struct rdcost_block_args {
69 ENTROPY_CONTEXT t_above[16];
70 ENTROPY_CONTEXT t_left[16];
80 int use_fast_coef_costing;
84 static const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
85 {NEARESTMV, {LAST_FRAME, NONE}},
86 {NEARESTMV, {ALTREF_FRAME, NONE}},
87 {NEARESTMV, {GOLDEN_FRAME, NONE}},
89 {DC_PRED, {INTRA_FRAME, NONE}},
91 {NEWMV, {LAST_FRAME, NONE}},
92 {NEWMV, {ALTREF_FRAME, NONE}},
93 {NEWMV, {GOLDEN_FRAME, NONE}},
95 {NEARMV, {LAST_FRAME, NONE}},
96 {NEARMV, {ALTREF_FRAME, NONE}},
97 {NEARESTMV, {LAST_FRAME, ALTREF_FRAME}},
98 {NEARESTMV, {GOLDEN_FRAME, ALTREF_FRAME}},
100 {TM_PRED, {INTRA_FRAME, NONE}},
102 {NEARMV, {LAST_FRAME, ALTREF_FRAME}},
103 {NEWMV, {LAST_FRAME, ALTREF_FRAME}},
104 {NEARMV, {GOLDEN_FRAME, NONE}},
105 {NEARMV, {GOLDEN_FRAME, ALTREF_FRAME}},
106 {NEWMV, {GOLDEN_FRAME, ALTREF_FRAME}},
108 {ZEROMV, {LAST_FRAME, NONE}},
109 {ZEROMV, {GOLDEN_FRAME, NONE}},
110 {ZEROMV, {ALTREF_FRAME, NONE}},
111 {ZEROMV, {LAST_FRAME, ALTREF_FRAME}},
112 {ZEROMV, {GOLDEN_FRAME, ALTREF_FRAME}},
114 {H_PRED, {INTRA_FRAME, NONE}},
115 {V_PRED, {INTRA_FRAME, NONE}},
116 {D135_PRED, {INTRA_FRAME, NONE}},
117 {D207_PRED, {INTRA_FRAME, NONE}},
118 {D153_PRED, {INTRA_FRAME, NONE}},
119 {D63_PRED, {INTRA_FRAME, NONE}},
120 {D117_PRED, {INTRA_FRAME, NONE}},
121 {D45_PRED, {INTRA_FRAME, NONE}},
124 static const REF_DEFINITION vp9_ref_order[MAX_REFS] = {
125 {{LAST_FRAME, NONE}},
126 {{GOLDEN_FRAME, NONE}},
127 {{ALTREF_FRAME, NONE}},
128 {{LAST_FRAME, ALTREF_FRAME}},
129 {{GOLDEN_FRAME, ALTREF_FRAME}},
130 {{INTRA_FRAME, NONE}},
133 // The baseline rd thresholds for breaking out of the rd loop for
134 // certain modes are assumed to be based on 8x8 blocks.
135 // This table is used to correct for blocks size.
136 // The factors here are << 2 (2 = x0.5, 32 = x8 etc).
137 static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES] = {
138 2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32
141 static int raster_block_offset(BLOCK_SIZE plane_bsize,
142 int raster_block, int stride) {
143 const int bw = b_width_log2(plane_bsize);
144 const int y = 4 * (raster_block >> bw);
145 const int x = 4 * (raster_block & ((1 << bw) - 1));
146 return y * stride + x;
148 static int16_t* raster_block_offset_int16(BLOCK_SIZE plane_bsize,
149 int raster_block, int16_t *base) {
150 const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
151 return base + raster_block_offset(plane_bsize, raster_block, stride);
154 static void fill_mode_costs(VP9_COMP *cpi) {
155 const FRAME_CONTEXT *const fc = &cpi->common.fc;
158 for (i = 0; i < INTRA_MODES; i++)
159 for (j = 0; j < INTRA_MODES; j++)
160 vp9_cost_tokens(cpi->y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j],
161 vp9_intra_mode_tree);
163 // TODO(rbultje) separate tables for superblock costing?
164 vp9_cost_tokens(cpi->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree);
165 vp9_cost_tokens(cpi->intra_uv_mode_cost[KEY_FRAME],
166 vp9_kf_uv_mode_prob[TM_PRED], vp9_intra_mode_tree);
167 vp9_cost_tokens(cpi->intra_uv_mode_cost[INTER_FRAME],
168 fc->uv_mode_prob[TM_PRED], vp9_intra_mode_tree);
170 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
171 vp9_cost_tokens(cpi->switchable_interp_costs[i],
172 fc->switchable_interp_prob[i], vp9_switchable_interp_tree);
175 static void fill_token_costs(vp9_coeff_cost *c,
176 vp9_coeff_probs_model (*p)[PLANE_TYPES]) {
179 for (t = TX_4X4; t <= TX_32X32; ++t)
180 for (i = 0; i < PLANE_TYPES; ++i)
181 for (j = 0; j < REF_TYPES; ++j)
182 for (k = 0; k < COEF_BANDS; ++k)
183 for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
184 vp9_prob probs[ENTROPY_NODES];
185 vp9_model_to_full_probs(p[t][i][j][k][l], probs);
186 vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs,
188 vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
190 assert(c[t][i][j][k][0][l][EOB_TOKEN] ==
191 c[t][i][j][k][1][l][EOB_TOKEN]);
195 static const uint8_t rd_iifactor[32] = {
196 4, 4, 3, 2, 1, 0, 0, 0,
197 0, 0, 0, 0, 0, 0, 0, 0,
198 0, 0, 0, 0, 0, 0, 0, 0,
199 0, 0, 0, 0, 0, 0, 0, 0,
202 // 3* dc_qlookup[Q]*dc_qlookup[Q];
204 /* values are now correlated to quantizer */
205 static int sad_per_bit16lut[QINDEX_RANGE];
206 static int sad_per_bit4lut[QINDEX_RANGE];
208 void vp9_init_me_luts() {
211 // Initialize the sad lut tables using a formulaic calculation for now
212 // This is to make it easier to resolve the impact of experimental changes
213 // to the quantizer tables.
214 for (i = 0; i < QINDEX_RANGE; i++) {
215 const double q = vp9_convert_qindex_to_q(i);
216 sad_per_bit16lut[i] = (int)(0.0418 * q + 2.4107);
217 sad_per_bit4lut[i] = (int)(0.063 * q + 2.742);
221 int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) {
222 const int q = vp9_dc_quant(qindex, 0);
223 // TODO(debargha): Adjust the function below
224 int rdmult = 88 * q * q / 25;
225 if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
226 if (cpi->twopass.next_iiratio > 31)
227 rdmult += (rdmult * rd_iifactor[31]) >> 4;
229 rdmult += (rdmult * rd_iifactor[cpi->twopass.next_iiratio]) >> 4;
234 static int compute_rd_thresh_factor(int qindex) {
235 // TODO(debargha): Adjust the function below
236 const int q = (int)(pow(vp9_dc_quant(qindex, 0) / 4.0, RD_THRESH_POW) * 5.12);
240 void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) {
241 cpi->mb.sadperbit16 = sad_per_bit16lut[qindex];
242 cpi->mb.sadperbit4 = sad_per_bit4lut[qindex];
245 static void set_block_thresholds(const VP9_COMMON *cm, RD_OPT *rd) {
246 int i, bsize, segment_id;
248 for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
249 const int qindex = clamp(vp9_get_qindex(&cm->seg, segment_id,
250 cm->base_qindex) + cm->y_dc_delta_q,
252 const int q = compute_rd_thresh_factor(qindex);
254 for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
255 // Threshold here seems unnecessarily harsh but fine given actual
256 // range of values used for cpi->sf.thresh_mult[].
257 const int t = q * rd_thresh_block_size_factor[bsize];
258 const int thresh_max = INT_MAX / t;
260 if (bsize >= BLOCK_8X8) {
261 for (i = 0; i < MAX_MODES; ++i)
262 rd->threshes[segment_id][bsize][i] =
263 rd->thresh_mult[i] < thresh_max
264 ? rd->thresh_mult[i] * t / 4
267 for (i = 0; i < MAX_REFS; ++i)
268 rd->threshes[segment_id][bsize][i] =
269 rd->thresh_mult_sub8x8[i] < thresh_max
270 ? rd->thresh_mult_sub8x8[i] * t / 4
277 void vp9_initialize_rd_consts(VP9_COMP *cpi) {
278 VP9_COMMON *const cm = &cpi->common;
279 MACROBLOCK *const x = &cpi->mb;
280 RD_OPT *const rd = &cpi->rd;
283 vp9_clear_system_state();
285 rd->RDDIV = RDDIV_BITS; // in bits (to multiply D by 128)
286 rd->RDMULT = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q);
288 x->errorperbit = rd->RDMULT / RD_MULT_EPB_RATIO;
289 x->errorperbit += (x->errorperbit == 0);
291 x->select_txfm_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
292 cm->frame_type != KEY_FRAME) ? 0 : 1;
294 set_block_thresholds(cm, rd);
296 if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME) {
297 fill_token_costs(x->token_costs, cm->fc.coef_probs);
299 for (i = 0; i < PARTITION_CONTEXTS; i++)
300 vp9_cost_tokens(x->partition_cost[i], get_partition_probs(cm, i),
304 if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1 ||
305 cm->frame_type == KEY_FRAME) {
306 fill_mode_costs(cpi);
308 if (!frame_is_intra_only(cm)) {
309 vp9_build_nmv_cost_table(x->nmvjointcost,
310 cm->allow_high_precision_mv ? x->nmvcost_hp
312 &cm->fc.nmvc, cm->allow_high_precision_mv);
314 for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
315 vp9_cost_tokens((int *)cpi->inter_mode_cost[i],
316 cm->fc.inter_mode_probs[i], vp9_inter_mode_tree);
321 static const int MAX_XSQ_Q10 = 245727;
323 static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
324 // NOTE: The tables below must be of the same size
326 // The functions described below are sampled at the four most significant
327 // bits of x^2 + 8 / 256
330 // This table models the rate for a Laplacian source
331 // source with given variance when quantized with a uniform quantizer
332 // with given stepsize. The closed form expression is:
333 // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
334 // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
335 // and H(x) is the binary entropy function.
336 static const int rate_tab_q10[] = {
337 65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651,
338 4553, 4389, 4255, 4142, 4044, 3958, 3881, 3811,
339 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186,
340 3133, 3037, 2952, 2877, 2809, 2747, 2690, 2638,
341 2589, 2501, 2423, 2353, 2290, 2232, 2179, 2130,
342 2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651,
343 1608, 1530, 1460, 1398, 1342, 1290, 1243, 1199,
344 1159, 1086, 1021, 963, 911, 864, 821, 781,
345 745, 680, 623, 574, 530, 490, 455, 424,
346 395, 345, 304, 269, 239, 213, 190, 171,
347 154, 126, 104, 87, 73, 61, 52, 44,
348 38, 28, 21, 16, 12, 10, 8, 6,
349 5, 3, 2, 1, 1, 1, 0, 0,
351 // Normalized distortion
352 // This table models the normalized distortion for a Laplacian source
353 // source with given variance when quantized with a uniform quantizer
354 // with given stepsize. The closed form expression is:
355 // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
356 // where x = qpstep / sqrt(variance)
357 // Note the actual distortion is Dn * variance.
358 static const int dist_tab_q10[] = {
359 0, 0, 1, 1, 1, 2, 2, 2,
360 3, 3, 4, 5, 5, 6, 7, 7,
361 8, 9, 11, 12, 13, 15, 16, 17,
362 18, 21, 24, 26, 29, 31, 34, 36,
363 39, 44, 49, 54, 59, 64, 69, 73,
364 78, 88, 97, 106, 115, 124, 133, 142,
365 151, 167, 184, 200, 215, 231, 245, 260,
366 274, 301, 327, 351, 375, 397, 418, 439,
367 458, 495, 528, 559, 587, 613, 637, 659,
368 680, 717, 749, 777, 801, 823, 842, 859,
369 874, 899, 919, 936, 949, 960, 969, 977,
370 983, 994, 1001, 1006, 1010, 1013, 1015, 1017,
371 1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024,
373 static const int xsq_iq_q10[] = {
374 0, 4, 8, 12, 16, 20, 24, 28,
375 32, 40, 48, 56, 64, 72, 80, 88,
376 96, 112, 128, 144, 160, 176, 192, 208,
377 224, 256, 288, 320, 352, 384, 416, 448,
378 480, 544, 608, 672, 736, 800, 864, 928,
379 992, 1120, 1248, 1376, 1504, 1632, 1760, 1888,
380 2016, 2272, 2528, 2784, 3040, 3296, 3552, 3808,
381 4064, 4576, 5088, 5600, 6112, 6624, 7136, 7648,
382 8160, 9184, 10208, 11232, 12256, 13280, 14304, 15328,
383 16352, 18400, 20448, 22496, 24544, 26592, 28640, 30688,
384 32736, 36832, 40928, 45024, 49120, 53216, 57312, 61408,
385 65504, 73696, 81888, 90080, 98272, 106464, 114656, 122848,
386 131040, 147424, 163808, 180192, 196576, 212960, 229344, 245728,
389 static const int tab_size = sizeof(rate_tab_q10) / sizeof(rate_tab_q10[0]);
390 assert(sizeof(dist_tab_q10) / sizeof(dist_tab_q10[0]) == tab_size);
391 assert(sizeof(xsq_iq_q10) / sizeof(xsq_iq_q10[0]) == tab_size);
392 assert(MAX_XSQ_Q10 + 1 == xsq_iq_q10[tab_size - 1]);
394 int tmp = (xsq_q10 >> 2) + 8;
395 int k = get_msb(tmp) - 3;
396 int xq = (k << 3) + ((tmp >> k) & 0x7);
397 const int one_q10 = 1 << 10;
398 const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
399 const int b_q10 = one_q10 - a_q10;
400 *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
401 *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
404 void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n,
405 unsigned int qstep, int *rate,
407 // This function models the rate and distortion for a Laplacian
408 // source with given variance when quantized with a uniform quantizer
409 // with given stepsize. The closed form expressions are in:
410 // Hang and Chen, "Source Model for transform video coder and its
411 // application - Part I: Fundamental Theory", IEEE Trans. Circ.
412 // Sys. for Video Tech., April 1997.
418 const uint64_t xsq_q10_64 =
419 ((((uint64_t)qstep * qstep * n) << 10) + (var >> 1)) / var;
420 const int xsq_q10 = xsq_q10_64 > MAX_XSQ_Q10 ?
421 MAX_XSQ_Q10 : (int)xsq_q10_64;
422 model_rd_norm(xsq_q10, &r_q10, &d_q10);
423 *rate = (n * r_q10 + 2) >> 2;
424 *dist = (var * (int64_t)d_q10 + 512) >> 10;
428 static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
429 MACROBLOCK *x, MACROBLOCKD *xd,
430 int *out_rate_sum, int64_t *out_dist_sum) {
431 // Note our transform coeffs are 8 times an orthogonal transform.
432 // Hence quantizer step is also 8 times. To get effective quantizer
433 // we need to divide by 8 before sending to modeling function.
435 int64_t rate_sum = 0;
436 int64_t dist_sum = 0;
437 const int ref = xd->mi[0]->mbmi.ref_frame[0];
440 for (i = 0; i < MAX_MB_PLANE; ++i) {
441 struct macroblock_plane *const p = &x->plane[i];
442 struct macroblockd_plane *const pd = &xd->plane[i];
443 const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
445 (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
446 pd->dst.buf, pd->dst.stride, &sse);
449 x->pred_sse[ref] = sse;
451 // Fast approximate the modelling function.
452 if (cpi->oxcf.speed > 4) {
455 int64_t square_error = sse;
456 int quantizer = (pd->dequant[1] >> 3);
459 rate = (square_error * (280 - quantizer)) >> 8;
462 dist = (square_error * quantizer) >> 8;
468 vp9_model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs],
469 pd->dequant[1] >> 3, &rate, &dist);
475 *out_rate_sum = (int)rate_sum;
476 *out_dist_sum = dist_sum << 4;
479 static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE bsize,
481 MACROBLOCK *x, MACROBLOCKD *xd,
482 int *out_rate_sum, int64_t *out_dist_sum,
486 const struct macroblock_plane *const p = &x->plane[0];
487 const struct macroblockd_plane *const pd = &xd->plane[0];
488 const int width = 4 * num_4x4_blocks_wide_lookup[bsize];
489 const int height = 4 * num_4x4_blocks_high_lookup[bsize];
491 int64_t dist_sum = 0;
492 const int t = 4 << tx_size;
494 if (tx_size == TX_4X4) {
496 } else if (tx_size == TX_8X8) {
498 } else if (tx_size == TX_16X16) {
500 } else if (tx_size == TX_32X32) {
507 for (j = 0; j < height; j += t) {
508 for (k = 0; k < width; k += t) {
512 cpi->fn_ptr[bs].vf(&p->src.buf[j * p->src.stride + k], p->src.stride,
513 &pd->dst.buf[j * pd->dst.stride + k], pd->dst.stride,
515 // sse works better than var, since there is no dc prediction used
516 vp9_model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3,
520 *out_skip &= (rate < 1024);
524 *out_rate_sum = rate_sum;
525 *out_dist_sum = dist_sum << 4;
528 int64_t vp9_block_error_c(const int16_t *coeff, const int16_t *dqcoeff,
529 intptr_t block_size, int64_t *ssz) {
531 int64_t error = 0, sqcoeff = 0;
533 for (i = 0; i < block_size; i++) {
534 const int diff = coeff[i] - dqcoeff[i];
535 error += diff * diff;
536 sqcoeff += coeff[i] * coeff[i];
543 /* The trailing '0' is a terminator which is used inside cost_coeffs() to
544 * decide whether to include cost of a trailing EOB node or not (i.e. we
545 * can skip this if the last coefficient in this transform block, e.g. the
546 * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
548 static const int16_t band_counts[TX_SIZES][8] = {
549 { 1, 2, 3, 4, 3, 16 - 13, 0 },
550 { 1, 2, 3, 4, 11, 64 - 21, 0 },
551 { 1, 2, 3, 4, 11, 256 - 21, 0 },
552 { 1, 2, 3, 4, 11, 1024 - 21, 0 },
554 static INLINE int cost_coeffs(MACROBLOCK *x,
555 int plane, int block,
556 ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L,
558 const int16_t *scan, const int16_t *nb,
559 int use_fast_coef_costing) {
560 MACROBLOCKD *const xd = &x->e_mbd;
561 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
562 const struct macroblock_plane *p = &x->plane[plane];
563 const struct macroblockd_plane *pd = &xd->plane[plane];
564 const PLANE_TYPE type = pd->plane_type;
565 const int16_t *band_count = &band_counts[tx_size][1];
566 const int eob = p->eobs[block];
567 const int16_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
568 unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
569 x->token_costs[tx_size][type][is_inter_block(mbmi)];
570 uint8_t token_cache[32 * 32];
571 int pt = combine_entropy_contexts(*A, *L);
573 // Check for consistency of tx_size with mode info
574 assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size
575 : get_uv_tx_size(mbmi) == tx_size);
579 cost = token_costs[0][0][pt][EOB_TOKEN];
582 int band_left = *band_count++;
586 int prev_t = vp9_dct_value_tokens_ptr[v].token;
587 cost = (*token_costs)[0][pt][prev_t] + vp9_dct_value_cost_ptr[v];
588 token_cache[0] = vp9_pt_energy_class[prev_t];
592 for (c = 1; c < eob; c++) {
593 const int rc = scan[c];
597 t = vp9_dct_value_tokens_ptr[v].token;
598 if (use_fast_coef_costing) {
599 cost += (*token_costs)[!prev_t][!prev_t][t] + vp9_dct_value_cost_ptr[v];
601 pt = get_coef_context(nb, token_cache, c);
602 cost += (*token_costs)[!prev_t][pt][t] + vp9_dct_value_cost_ptr[v];
603 token_cache[rc] = vp9_pt_energy_class[t];
607 band_left = *band_count++;
614 if (use_fast_coef_costing) {
615 cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
617 pt = get_coef_context(nb, token_cache, c);
618 cost += (*token_costs)[0][pt][EOB_TOKEN];
623 // is eob first coefficient;
628 static void dist_block(int plane, int block, TX_SIZE tx_size,
629 struct rdcost_block_args* args) {
630 const int ss_txfrm_size = tx_size << 1;
631 MACROBLOCK* const x = args->x;
632 MACROBLOCKD* const xd = &x->e_mbd;
633 const struct macroblock_plane *const p = &x->plane[plane];
634 const struct macroblockd_plane *const pd = &xd->plane[plane];
636 int shift = tx_size == TX_32X32 ? 0 : 2;
637 int16_t *const coeff = BLOCK_OFFSET(p->coeff, block);
638 int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
639 args->dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size,
641 args->sse = this_sse >> shift;
643 if (x->skip_encode && !is_inter_block(&xd->mi[0]->mbmi)) {
644 // TODO(jingning): tune the model to better capture the distortion.
645 int64_t p = (pd->dequant[1] * pd->dequant[1] *
646 (1 << ss_txfrm_size)) >> (shift + 2);
647 args->dist += (p >> 4);
652 static void rate_block(int plane, int block, BLOCK_SIZE plane_bsize,
653 TX_SIZE tx_size, struct rdcost_block_args* args) {
655 txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x_idx, &y_idx);
657 args->rate = cost_coeffs(args->x, plane, block, args->t_above + x_idx,
658 args->t_left + y_idx, tx_size,
659 args->so->scan, args->so->neighbors,
660 args->use_fast_coef_costing);
663 static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
664 TX_SIZE tx_size, void *arg) {
665 struct rdcost_block_args *args = arg;
666 MACROBLOCK *const x = args->x;
667 MACROBLOCKD *const xd = &x->e_mbd;
668 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
669 int64_t rd1, rd2, rd;
674 if (!is_inter_block(mbmi))
675 vp9_encode_block_intra(x, plane, block, plane_bsize, tx_size, &mbmi->skip);
677 vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
679 dist_block(plane, block, tx_size, args);
680 rate_block(plane, block, plane_bsize, tx_size, args);
681 rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist);
682 rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse);
684 // TODO(jingning): temporarily enabled only for luma component
687 x->zcoeff_blk[tx_size][block] = !x->plane[plane].eobs[block] ||
688 (rd1 > rd2 && !xd->lossless);
690 args->this_rate += args->rate;
691 args->this_dist += args->dist;
692 args->this_sse += args->sse;
695 if (args->this_rd > args->best_rd) {
701 void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
702 const struct macroblockd_plane *pd,
703 ENTROPY_CONTEXT t_above[16],
704 ENTROPY_CONTEXT t_left[16]) {
705 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
706 const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
707 const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
708 const ENTROPY_CONTEXT *const above = pd->above_context;
709 const ENTROPY_CONTEXT *const left = pd->left_context;
714 vpx_memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
715 vpx_memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
718 for (i = 0; i < num_4x4_w; i += 2)
719 t_above[i] = !!*(const uint16_t *)&above[i];
720 for (i = 0; i < num_4x4_h; i += 2)
721 t_left[i] = !!*(const uint16_t *)&left[i];
724 for (i = 0; i < num_4x4_w; i += 4)
725 t_above[i] = !!*(const uint32_t *)&above[i];
726 for (i = 0; i < num_4x4_h; i += 4)
727 t_left[i] = !!*(const uint32_t *)&left[i];
730 for (i = 0; i < num_4x4_w; i += 8)
731 t_above[i] = !!*(const uint64_t *)&above[i];
732 for (i = 0; i < num_4x4_h; i += 8)
733 t_left[i] = !!*(const uint64_t *)&left[i];
736 assert(0 && "Invalid transform size.");
740 static void txfm_rd_in_plane(MACROBLOCK *x,
741 int *rate, int64_t *distortion,
742 int *skippable, int64_t *sse,
743 int64_t ref_best_rd, int plane,
744 BLOCK_SIZE bsize, TX_SIZE tx_size,
745 int use_fast_coef_casting) {
746 MACROBLOCKD *const xd = &x->e_mbd;
747 const struct macroblockd_plane *const pd = &xd->plane[plane];
748 struct rdcost_block_args args = { 0 };
750 args.best_rd = ref_best_rd;
751 args.use_fast_coef_costing = use_fast_coef_casting;
754 xd->mi[0]->mbmi.tx_size = tx_size;
756 vp9_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
758 args.so = get_scan(xd, tx_size, pd->plane_type, 0);
760 vp9_foreach_transformed_block_in_plane(xd, bsize, plane,
761 block_rd_txfm, &args);
764 *distortion = INT64_MAX;
768 *distortion = args.this_dist;
769 *rate = args.this_rate;
770 *sse = args.this_sse;
771 *skippable = vp9_is_skippable_in_plane(x, bsize, plane);
775 static void choose_largest_txfm_size(VP9_COMP *cpi, MACROBLOCK *x,
776 int *rate, int64_t *distortion,
777 int *skip, int64_t *sse,
780 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
781 VP9_COMMON *const cm = &cpi->common;
782 const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
783 MACROBLOCKD *const xd = &x->e_mbd;
784 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
786 mbmi->tx_size = MIN(max_tx_size, largest_tx_size);
788 txfm_rd_in_plane(x, rate, distortion, skip,
789 &sse[mbmi->tx_size], ref_best_rd, 0, bs,
790 mbmi->tx_size, cpi->sf.use_fast_coef_costing);
791 cpi->tx_stepdown_count[0]++;
794 static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x,
795 int (*r)[2], int *rate,
796 int64_t *d, int64_t *distortion,
798 int64_t tx_cache[TX_MODES],
800 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
801 VP9_COMMON *const cm = &cpi->common;
802 MACROBLOCKD *const xd = &x->e_mbd;
803 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
804 vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
805 int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX},
806 {INT64_MAX, INT64_MAX},
807 {INT64_MAX, INT64_MAX},
808 {INT64_MAX, INT64_MAX}};
811 const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
812 int64_t best_rd = INT64_MAX;
813 TX_SIZE best_tx = TX_4X4;
815 const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs);
816 assert(skip_prob > 0);
817 s0 = vp9_cost_bit(skip_prob, 0);
818 s1 = vp9_cost_bit(skip_prob, 1);
820 for (n = TX_4X4; n <= max_tx_size; n++) {
822 if (r[n][0] < INT_MAX) {
823 for (m = 0; m <= n - (n == max_tx_size); m++) {
825 r[n][1] += vp9_cost_zero(tx_probs[m]);
827 r[n][1] += vp9_cost_one(tx_probs[m]);
830 if (d[n] == INT64_MAX) {
831 rd[n][0] = rd[n][1] = INT64_MAX;
833 rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, d[n]);
835 rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
836 rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
839 if (rd[n][1] < best_rd) {
844 mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ?
845 best_tx : MIN(max_tx_size, max_mode_tx_size);
848 *distortion = d[mbmi->tx_size];
849 *rate = r[mbmi->tx_size][cm->tx_mode == TX_MODE_SELECT];
850 *skip = s[mbmi->tx_size];
852 tx_cache[ONLY_4X4] = rd[TX_4X4][0];
853 tx_cache[ALLOW_8X8] = rd[TX_8X8][0];
854 tx_cache[ALLOW_16X16] = rd[MIN(max_tx_size, TX_16X16)][0];
855 tx_cache[ALLOW_32X32] = rd[MIN(max_tx_size, TX_32X32)][0];
857 if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
858 tx_cache[TX_MODE_SELECT] = rd[TX_32X32][1];
859 cpi->tx_stepdown_count[0]++;
860 } else if (max_tx_size >= TX_16X16 && best_tx == TX_16X16) {
861 tx_cache[TX_MODE_SELECT] = rd[TX_16X16][1];
862 cpi->tx_stepdown_count[max_tx_size - TX_16X16]++;
863 } else if (rd[TX_8X8][1] < rd[TX_4X4][1]) {
864 tx_cache[TX_MODE_SELECT] = rd[TX_8X8][1];
865 cpi->tx_stepdown_count[max_tx_size - TX_8X8]++;
867 tx_cache[TX_MODE_SELECT] = rd[TX_4X4][1];
868 cpi->tx_stepdown_count[max_tx_size - TX_4X4]++;
872 static int64_t scaled_rd_cost(int rdmult, int rddiv,
873 int rate, int64_t dist, double scale) {
874 return (int64_t) (RDCOST(rdmult, rddiv, rate, dist) * scale);
877 static void choose_txfm_size_from_modelrd(VP9_COMP *cpi, MACROBLOCK *x,
878 int (*r)[2], int *rate,
879 int64_t *d, int64_t *distortion,
880 int *s, int *skip, int64_t *sse,
883 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
884 VP9_COMMON *const cm = &cpi->common;
885 MACROBLOCKD *const xd = &x->e_mbd;
886 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
887 vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
888 int64_t rd[TX_SIZES][2] = {{INT64_MAX, INT64_MAX},
889 {INT64_MAX, INT64_MAX},
890 {INT64_MAX, INT64_MAX},
891 {INT64_MAX, INT64_MAX}};
894 double scale_rd[TX_SIZES] = {1.73, 1.44, 1.20, 1.00};
895 const TX_SIZE max_mode_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
896 int64_t best_rd = INT64_MAX;
897 TX_SIZE best_tx = TX_4X4;
899 const vp9_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc.tx_probs);
900 assert(skip_prob > 0);
901 s0 = vp9_cost_bit(skip_prob, 0);
902 s1 = vp9_cost_bit(skip_prob, 1);
904 for (n = TX_4X4; n <= max_tx_size; n++) {
905 double scale = scale_rd[n];
907 for (m = 0; m <= n - (n == max_tx_size); m++) {
909 r[n][1] += vp9_cost_zero(tx_probs[m]);
911 r[n][1] += vp9_cost_one(tx_probs[m]);
914 rd[n][0] = rd[n][1] = scaled_rd_cost(x->rdmult, x->rddiv, s1, d[n],
917 rd[n][0] = scaled_rd_cost(x->rdmult, x->rddiv, r[n][0] + s0, d[n],
919 rd[n][1] = scaled_rd_cost(x->rdmult, x->rddiv, r[n][1] + s0, d[n],
922 if (rd[n][1] < best_rd) {
928 mbmi->tx_size = cm->tx_mode == TX_MODE_SELECT ?
929 best_tx : MIN(max_tx_size, max_mode_tx_size);
931 // Actually encode using the chosen mode if a model was used, but do not
932 // update the r, d costs
933 txfm_rd_in_plane(x, rate, distortion, skip,
934 &sse[mbmi->tx_size], ref_best_rd, 0, bs, mbmi->tx_size,
935 cpi->sf.use_fast_coef_costing);
937 if (max_tx_size == TX_32X32 && best_tx == TX_32X32) {
938 cpi->tx_stepdown_count[0]++;
939 } else if (max_tx_size >= TX_16X16 && best_tx == TX_16X16) {
940 cpi->tx_stepdown_count[max_tx_size - TX_16X16]++;
941 } else if (rd[TX_8X8][1] <= rd[TX_4X4][1]) {
942 cpi->tx_stepdown_count[max_tx_size - TX_8X8]++;
944 cpi->tx_stepdown_count[max_tx_size - TX_4X4]++;
948 static void inter_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
949 int64_t *distortion, int *skip,
950 int64_t *psse, BLOCK_SIZE bs,
951 int64_t txfm_cache[TX_MODES],
952 int64_t ref_best_rd) {
953 int r[TX_SIZES][2], s[TX_SIZES];
954 int64_t d[TX_SIZES], sse[TX_SIZES];
955 MACROBLOCKD *xd = &x->e_mbd;
956 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
957 const TX_SIZE max_tx_size = max_txsize_lookup[bs];
960 assert(bs == mbmi->sb_type);
962 vp9_subtract_plane(x, bs, 0);
964 if (cpi->sf.tx_size_search_method == USE_LARGESTALL) {
965 vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t));
966 choose_largest_txfm_size(cpi, x, rate, distortion, skip, sse,
969 *psse = sse[mbmi->tx_size];
973 if (cpi->sf.tx_size_search_method == USE_LARGESTINTRA_MODELINTER) {
974 for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size)
975 model_rd_for_sb_y_tx(cpi, bs, tx_size, x, xd,
976 &r[tx_size][0], &d[tx_size], &s[tx_size]);
977 choose_txfm_size_from_modelrd(cpi, x, r, rate, d, distortion, s,
978 skip, sse, ref_best_rd, bs);
980 for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size)
981 txfm_rd_in_plane(x, &r[tx_size][0], &d[tx_size],
982 &s[tx_size], &sse[tx_size],
983 ref_best_rd, 0, bs, tx_size,
984 cpi->sf.use_fast_coef_costing);
985 choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s,
986 skip, txfm_cache, bs);
989 *psse = sse[mbmi->tx_size];
992 static void intra_super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
993 int64_t *distortion, int *skip,
994 int64_t *psse, BLOCK_SIZE bs,
995 int64_t txfm_cache[TX_MODES],
996 int64_t ref_best_rd) {
997 int64_t sse[TX_SIZES];
998 MACROBLOCKD *xd = &x->e_mbd;
999 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
1001 assert(bs == mbmi->sb_type);
1002 if (cpi->sf.tx_size_search_method != USE_FULL_RD) {
1003 vpx_memset(txfm_cache, 0, TX_MODES * sizeof(int64_t));
1004 choose_largest_txfm_size(cpi, x, rate, distortion, skip, sse,
1007 int r[TX_SIZES][2], s[TX_SIZES];
1008 int64_t d[TX_SIZES];
1010 for (tx_size = TX_4X4; tx_size <= max_txsize_lookup[bs]; ++tx_size)
1011 txfm_rd_in_plane(x, &r[tx_size][0], &d[tx_size],
1012 &s[tx_size], &sse[tx_size],
1013 ref_best_rd, 0, bs, tx_size,
1014 cpi->sf.use_fast_coef_costing);
1015 choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s,
1016 skip, txfm_cache, bs);
1019 *psse = sse[mbmi->tx_size];
1023 static int conditional_skipintra(PREDICTION_MODE mode,
1024 PREDICTION_MODE best_intra_mode) {
1025 if (mode == D117_PRED &&
1026 best_intra_mode != V_PRED &&
1027 best_intra_mode != D135_PRED)
1029 if (mode == D63_PRED &&
1030 best_intra_mode != V_PRED &&
1031 best_intra_mode != D45_PRED)
1033 if (mode == D207_PRED &&
1034 best_intra_mode != H_PRED &&
1035 best_intra_mode != D45_PRED)
1037 if (mode == D153_PRED &&
1038 best_intra_mode != H_PRED &&
1039 best_intra_mode != D135_PRED)
1044 static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
1045 PREDICTION_MODE *best_mode,
1046 const int *bmode_costs,
1047 ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
1048 int *bestrate, int *bestratey,
1049 int64_t *bestdistortion,
1050 BLOCK_SIZE bsize, int64_t rd_thresh) {
1051 PREDICTION_MODE mode;
1052 MACROBLOCKD *const xd = &x->e_mbd;
1053 int64_t best_rd = rd_thresh;
1055 struct macroblock_plane *p = &x->plane[0];
1056 struct macroblockd_plane *pd = &xd->plane[0];
1057 const int src_stride = p->src.stride;
1058 const int dst_stride = pd->dst.stride;
1059 const uint8_t *src_init = &p->src.buf[raster_block_offset(BLOCK_8X8, ib,
1061 uint8_t *dst_init = &pd->dst.buf[raster_block_offset(BLOCK_8X8, ib,
1063 ENTROPY_CONTEXT ta[2], tempa[2];
1064 ENTROPY_CONTEXT tl[2], templ[2];
1066 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1067 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1069 uint8_t best_dst[8 * 8];
1073 vpx_memcpy(ta, a, sizeof(ta));
1074 vpx_memcpy(tl, l, sizeof(tl));
1075 xd->mi[0]->mbmi.tx_size = TX_4X4;
1077 for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
1080 int64_t distortion = 0;
1081 int rate = bmode_costs[mode];
1083 if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode)))
1086 // Only do the oblique modes if the best so far is
1087 // one of the neighboring directional modes
1088 if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
1089 if (conditional_skipintra(mode, *best_mode))
1093 vpx_memcpy(tempa, ta, sizeof(ta));
1094 vpx_memcpy(templ, tl, sizeof(tl));
1096 for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
1097 for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
1098 const int block = ib + idy * 2 + idx;
1099 const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
1100 uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
1101 int16_t *const src_diff = raster_block_offset_int16(BLOCK_8X8, block,
1103 int16_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
1104 xd->mi[0]->bmi[block].as_mode = mode;
1105 vp9_predict_intra_block(xd, block, 1,
1107 x->skip_encode ? src : dst,
1108 x->skip_encode ? src_stride : dst_stride,
1109 dst, dst_stride, idx, idy, 0);
1110 vp9_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, dst_stride);
1113 const scan_order *so = &vp9_default_scan_orders[TX_4X4];
1114 vp9_fwht4x4(src_diff, coeff, 8);
1115 vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
1116 ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
1117 so->scan, so->neighbors,
1118 cpi->sf.use_fast_coef_costing);
1119 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
1121 vp9_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block), dst, dst_stride,
1125 const TX_TYPE tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block);
1126 const scan_order *so = &vp9_scan_orders[TX_4X4][tx_type];
1127 vp9_fht4x4(src_diff, coeff, 8, tx_type);
1128 vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
1129 ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4,
1130 so->scan, so->neighbors,
1131 cpi->sf.use_fast_coef_costing);
1132 distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block),
1134 if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
1136 vp9_iht4x4_add(tx_type, BLOCK_OFFSET(pd->dqcoeff, block),
1137 dst, dst_stride, p->eobs[block]);
1143 this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
1145 if (this_rd < best_rd) {
1148 *bestdistortion = distortion;
1151 vpx_memcpy(a, tempa, sizeof(tempa));
1152 vpx_memcpy(l, templ, sizeof(templ));
1153 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
1154 vpx_memcpy(best_dst + idy * 8, dst_init + idy * dst_stride,
1155 num_4x4_blocks_wide * 4);
1161 if (best_rd >= rd_thresh || x->skip_encode)
1164 for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
1165 vpx_memcpy(dst_init + idy * dst_stride, best_dst + idy * 8,
1166 num_4x4_blocks_wide * 4);
1171 static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP *cpi, MACROBLOCK *mb,
1172 int *rate, int *rate_y,
1173 int64_t *distortion,
1176 const MACROBLOCKD *const xd = &mb->e_mbd;
1177 MODE_INFO *const mic = xd->mi[0];
1178 const MODE_INFO *above_mi = xd->mi[-xd->mi_stride];
1179 const MODE_INFO *left_mi = xd->left_available ? xd->mi[-1] : NULL;
1180 const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
1181 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1182 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1185 int64_t total_distortion = 0;
1187 int64_t total_rd = 0;
1188 ENTROPY_CONTEXT t_above[4], t_left[4];
1189 const int *bmode_costs = cpi->mbmode_cost;
1191 vpx_memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
1192 vpx_memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
1194 // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block.
1195 for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
1196 for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
1197 PREDICTION_MODE best_mode = DC_PRED;
1198 int r = INT_MAX, ry = INT_MAX;
1199 int64_t d = INT64_MAX, this_rd = INT64_MAX;
1201 if (cpi->common.frame_type == KEY_FRAME) {
1202 const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, i);
1203 const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, i);
1205 bmode_costs = cpi->y_mode_costs[A][L];
1208 this_rd = rd_pick_intra4x4block(cpi, mb, i, &best_mode, bmode_costs,
1209 t_above + idx, t_left + idy, &r, &ry, &d,
1210 bsize, best_rd - total_rd);
1211 if (this_rd >= best_rd - total_rd)
1214 total_rd += this_rd;
1216 total_distortion += d;
1219 mic->bmi[i].as_mode = best_mode;
1220 for (j = 1; j < num_4x4_blocks_high; ++j)
1221 mic->bmi[i + j * 2].as_mode = best_mode;
1222 for (j = 1; j < num_4x4_blocks_wide; ++j)
1223 mic->bmi[i + j].as_mode = best_mode;
1225 if (total_rd >= best_rd)
1231 *rate_y = tot_rate_y;
1232 *distortion = total_distortion;
1233 mic->mbmi.mode = mic->bmi[3].as_mode;
1235 return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion);
1238 static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x,
1239 int *rate, int *rate_tokenonly,
1240 int64_t *distortion, int *skippable,
1242 int64_t tx_cache[TX_MODES],
1244 PREDICTION_MODE mode;
1245 PREDICTION_MODE mode_selected = DC_PRED;
1246 MACROBLOCKD *const xd = &x->e_mbd;
1247 MODE_INFO *const mic = xd->mi[0];
1248 int this_rate, this_rate_tokenonly, s;
1249 int64_t this_distortion, this_rd;
1250 TX_SIZE best_tx = TX_4X4;
1252 int *bmode_costs = cpi->mbmode_cost;
1254 if (cpi->sf.tx_size_search_method == USE_FULL_RD)
1255 for (i = 0; i < TX_MODES; i++)
1256 tx_cache[i] = INT64_MAX;
1258 /* Y Search for intra prediction mode */
1259 for (mode = DC_PRED; mode <= TM_PRED; mode++) {
1260 int64_t local_tx_cache[TX_MODES];
1261 MODE_INFO *above_mi = xd->mi[-xd->mi_stride];
1262 MODE_INFO *left_mi = xd->left_available ? xd->mi[-1] : NULL;
1264 if (!(cpi->sf.intra_y_mode_mask[max_txsize_lookup[bsize]] & (1 << mode)))
1267 if (cpi->common.frame_type == KEY_FRAME) {
1268 const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, 0);
1269 const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, 0);
1271 bmode_costs = cpi->y_mode_costs[A][L];
1273 mic->mbmi.mode = mode;
1275 intra_super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion,
1276 &s, NULL, bsize, local_tx_cache, best_rd);
1278 if (this_rate_tokenonly == INT_MAX)
1281 this_rate = this_rate_tokenonly + bmode_costs[mode];
1282 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
1284 if (this_rd < best_rd) {
1285 mode_selected = mode;
1287 best_tx = mic->mbmi.tx_size;
1289 *rate_tokenonly = this_rate_tokenonly;
1290 *distortion = this_distortion;
1294 if (cpi->sf.tx_size_search_method == USE_FULL_RD && this_rd < INT64_MAX) {
1295 for (i = 0; i < TX_MODES && local_tx_cache[i] < INT64_MAX; i++) {
1296 const int64_t adj_rd = this_rd + local_tx_cache[i] -
1297 local_tx_cache[cpi->common.tx_mode];
1298 if (adj_rd < tx_cache[i]) {
1299 tx_cache[i] = adj_rd;
1305 mic->mbmi.mode = mode_selected;
1306 mic->mbmi.tx_size = best_tx;
1311 static void super_block_uvrd(const VP9_COMP *cpi, MACROBLOCK *x,
1312 int *rate, int64_t *distortion, int *skippable,
1313 int64_t *sse, BLOCK_SIZE bsize,
1314 int64_t ref_best_rd) {
1315 MACROBLOCKD *const xd = &x->e_mbd;
1316 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
1317 TX_SIZE uv_txfm_size = get_uv_tx_size(mbmi);
1319 int pnrate = 0, pnskip = 1;
1320 int64_t pndist = 0, pnsse = 0;
1322 if (ref_best_rd < 0)
1325 if (is_inter_block(mbmi)) {
1327 for (plane = 1; plane < MAX_MB_PLANE; ++plane)
1328 vp9_subtract_plane(x, bsize, plane);
1336 for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
1337 txfm_rd_in_plane(x, &pnrate, &pndist, &pnskip, &pnsse,
1338 ref_best_rd, plane, bsize, uv_txfm_size,
1339 cpi->sf.use_fast_coef_costing);
1340 if (pnrate == INT_MAX)
1343 *distortion += pndist;
1345 *skippable &= pnskip;
1351 *distortion = INT64_MAX;
1357 static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
1358 PICK_MODE_CONTEXT *ctx,
1359 int *rate, int *rate_tokenonly,
1360 int64_t *distortion, int *skippable,
1361 BLOCK_SIZE bsize, TX_SIZE max_tx_size) {
1362 MACROBLOCKD *xd = &x->e_mbd;
1363 PREDICTION_MODE mode;
1364 PREDICTION_MODE mode_selected = DC_PRED;
1365 int64_t best_rd = INT64_MAX, this_rd;
1366 int this_rate_tokenonly, this_rate, s;
1367 int64_t this_distortion, this_sse;
1369 for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
1370 if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode)))
1373 xd->mi[0]->mbmi.uv_mode = mode;
1375 super_block_uvrd(cpi, x, &this_rate_tokenonly,
1376 &this_distortion, &s, &this_sse, bsize, best_rd);
1377 if (this_rate_tokenonly == INT_MAX)
1379 this_rate = this_rate_tokenonly +
1380 cpi->intra_uv_mode_cost[cpi->common.frame_type][mode];
1381 this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
1383 if (this_rd < best_rd) {
1384 mode_selected = mode;
1387 *rate_tokenonly = this_rate_tokenonly;
1388 *distortion = this_distortion;
1390 if (!x->select_txfm_size) {
1392 struct macroblock_plane *const p = x->plane;
1393 struct macroblockd_plane *const pd = xd->plane;
1394 for (i = 1; i < MAX_MB_PLANE; ++i) {
1395 p[i].coeff = ctx->coeff_pbuf[i][2];
1396 p[i].qcoeff = ctx->qcoeff_pbuf[i][2];
1397 pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][2];
1398 p[i].eobs = ctx->eobs_pbuf[i][2];
1400 ctx->coeff_pbuf[i][2] = ctx->coeff_pbuf[i][0];
1401 ctx->qcoeff_pbuf[i][2] = ctx->qcoeff_pbuf[i][0];
1402 ctx->dqcoeff_pbuf[i][2] = ctx->dqcoeff_pbuf[i][0];
1403 ctx->eobs_pbuf[i][2] = ctx->eobs_pbuf[i][0];
1405 ctx->coeff_pbuf[i][0] = p[i].coeff;
1406 ctx->qcoeff_pbuf[i][0] = p[i].qcoeff;
1407 ctx->dqcoeff_pbuf[i][0] = pd[i].dqcoeff;
1408 ctx->eobs_pbuf[i][0] = p[i].eobs;
1414 xd->mi[0]->mbmi.uv_mode = mode_selected;
1418 static int64_t rd_sbuv_dcpred(const VP9_COMP *cpi, MACROBLOCK *x,
1419 int *rate, int *rate_tokenonly,
1420 int64_t *distortion, int *skippable,
1422 const VP9_COMMON *cm = &cpi->common;
1425 x->e_mbd.mi[0]->mbmi.uv_mode = DC_PRED;
1426 super_block_uvrd(cpi, x, rate_tokenonly, distortion,
1427 skippable, &unused, bsize, INT64_MAX);
1428 *rate = *rate_tokenonly + cpi->intra_uv_mode_cost[cm->frame_type][DC_PRED];
1429 return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
1432 static void choose_intra_uv_mode(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
1433 BLOCK_SIZE bsize, TX_SIZE max_tx_size,
1434 int *rate_uv, int *rate_uv_tokenonly,
1435 int64_t *dist_uv, int *skip_uv,
1436 PREDICTION_MODE *mode_uv) {
1437 MACROBLOCK *const x = &cpi->mb;
1439 // Use an estimated rd for uv_intra based on DC_PRED if the
1440 // appropriate speed flag is set.
1441 if (cpi->sf.use_uv_intra_rd_estimate) {
1442 rd_sbuv_dcpred(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv,
1443 skip_uv, bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
1444 // Else do a proper rd search for each possible transform size that may
1445 // be considered in the main rd loop.
1447 rd_pick_intra_sbuv_mode(cpi, x, ctx,
1448 rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
1449 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize, max_tx_size);
1451 *mode_uv = x->e_mbd.mi[0]->mbmi.uv_mode;
1454 static int cost_mv_ref(const VP9_COMP *cpi, PREDICTION_MODE mode,
1456 const MACROBLOCK *const x = &cpi->mb;
1457 const int segment_id = x->e_mbd.mi[0]->mbmi.segment_id;
1459 // Don't account for mode here if segment skip is enabled.
1460 if (!vp9_segfeature_active(&cpi->common.seg, segment_id, SEG_LVL_SKIP)) {
1461 assert(is_inter_mode(mode));
1462 return cpi->inter_mode_cost[mode_context][INTER_OFFSET(mode)];
1468 static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
1471 int mi_row, int mi_col,
1472 int_mv single_newmv[MAX_REF_FRAMES],
1475 static int set_and_cost_bmi_mvs(VP9_COMP *cpi, MACROBLOCKD *xd, int i,
1476 PREDICTION_MODE mode, int_mv this_mv[2],
1477 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
1478 int_mv seg_mvs[MAX_REF_FRAMES],
1479 int_mv *best_ref_mv[2], const int *mvjcost,
1481 MODE_INFO *const mic = xd->mi[0];
1482 const MB_MODE_INFO *const mbmi = &mic->mbmi;
1485 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type];
1486 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type];
1487 const int is_compound = has_second_ref(mbmi);
1491 this_mv[0].as_int = seg_mvs[mbmi->ref_frame[0]].as_int;
1492 thismvcost += vp9_mv_bit_cost(&this_mv[0].as_mv, &best_ref_mv[0]->as_mv,
1493 mvjcost, mvcost, MV_COST_WEIGHT_SUB);
1495 this_mv[1].as_int = seg_mvs[mbmi->ref_frame[1]].as_int;
1496 thismvcost += vp9_mv_bit_cost(&this_mv[1].as_mv, &best_ref_mv[1]->as_mv,
1497 mvjcost, mvcost, MV_COST_WEIGHT_SUB);
1502 this_mv[0].as_int = frame_mv[mode][mbmi->ref_frame[0]].as_int;
1504 this_mv[1].as_int = frame_mv[mode][mbmi->ref_frame[1]].as_int;
1507 this_mv[0].as_int = 0;
1509 this_mv[1].as_int = 0;
1515 mic->bmi[i].as_mv[0].as_int = this_mv[0].as_int;
1517 mic->bmi[i].as_mv[1].as_int = this_mv[1].as_int;
1519 mic->bmi[i].as_mode = mode;
1521 for (idy = 0; idy < num_4x4_blocks_high; ++idy)
1522 for (idx = 0; idx < num_4x4_blocks_wide; ++idx)
1523 vpx_memcpy(&mic->bmi[i + idy * 2 + idx],
1524 &mic->bmi[i], sizeof(mic->bmi[i]));
1526 return cost_mv_ref(cpi, mode, mbmi->mode_context[mbmi->ref_frame[0]]) +
1530 static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
1535 int64_t *distortion, int64_t *sse,
1536 ENTROPY_CONTEXT *ta,
1537 ENTROPY_CONTEXT *tl,
1538 int mi_row, int mi_col) {
1540 MACROBLOCKD *xd = &x->e_mbd;
1541 struct macroblockd_plane *const pd = &xd->plane[0];
1542 struct macroblock_plane *const p = &x->plane[0];
1543 MODE_INFO *const mi = xd->mi[0];
1544 const BLOCK_SIZE plane_bsize = get_plane_block_size(mi->mbmi.sb_type, pd);
1545 const int width = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
1546 const int height = 4 * num_4x4_blocks_high_lookup[plane_bsize];
1549 const uint8_t *const src = &p->src.buf[raster_block_offset(BLOCK_8X8, i,
1551 uint8_t *const dst = &pd->dst.buf[raster_block_offset(BLOCK_8X8, i,
1553 int64_t thisdistortion = 0, thissse = 0;
1554 int thisrate = 0, ref;
1555 const scan_order *so = &vp9_default_scan_orders[TX_4X4];
1556 const int is_compound = has_second_ref(&mi->mbmi);
1557 const InterpKernel *kernel = vp9_get_interp_kernel(mi->mbmi.interp_filter);
1559 for (ref = 0; ref < 1 + is_compound; ++ref) {
1560 const uint8_t *pre = &pd->pre[ref].buf[raster_block_offset(BLOCK_8X8, i,
1561 pd->pre[ref].stride)];
1562 vp9_build_inter_predictor(pre, pd->pre[ref].stride,
1563 dst, pd->dst.stride,
1564 &mi->bmi[i].as_mv[ref].as_mv,
1565 &xd->block_refs[ref]->sf, width, height, ref,
1566 kernel, MV_PRECISION_Q3,
1567 mi_col * MI_SIZE + 4 * (i % 2),
1568 mi_row * MI_SIZE + 4 * (i / 2));
1571 vp9_subtract_block(height, width,
1572 raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 8,
1574 dst, pd->dst.stride);
1577 for (idy = 0; idy < height / 4; ++idy) {
1578 for (idx = 0; idx < width / 4; ++idx) {
1579 int64_t ssz, rd, rd1, rd2;
1582 k += (idy * 2 + idx);
1583 coeff = BLOCK_OFFSET(p->coeff, k);
1584 x->fwd_txm4x4(raster_block_offset_int16(BLOCK_8X8, k, p->src_diff),
1586 vp9_regular_quantize_b_4x4(x, 0, k, so->scan, so->iscan);
1587 thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k),
1590 thisrate += cost_coeffs(x, 0, k, ta + (k & 1), tl + (k >> 1), TX_4X4,
1591 so->scan, so->neighbors,
1592 cpi->sf.use_fast_coef_costing);
1593 rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion >> 2);
1594 rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse >> 2);
1601 *distortion = thisdistortion >> 2;
1602 *labelyrate = thisrate;
1603 *sse = thissse >> 2;
1605 return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion);
1616 ENTROPY_CONTEXT ta[2];
1617 ENTROPY_CONTEXT tl[2];
1629 PREDICTION_MODE modes[4];
1630 SEG_RDSTAT rdstat[4][INTER_MODES];
1634 static INLINE int mv_check_bounds(const MACROBLOCK *x, const MV *mv) {
1635 return (mv->row >> 3) < x->mv_row_min ||
1636 (mv->row >> 3) > x->mv_row_max ||
1637 (mv->col >> 3) < x->mv_col_min ||
1638 (mv->col >> 3) > x->mv_col_max;
1641 static INLINE void mi_buf_shift(MACROBLOCK *x, int i) {
1642 MB_MODE_INFO *const mbmi = &x->e_mbd.mi[0]->mbmi;
1643 struct macroblock_plane *const p = &x->plane[0];
1644 struct macroblockd_plane *const pd = &x->e_mbd.plane[0];
1646 p->src.buf = &p->src.buf[raster_block_offset(BLOCK_8X8, i, p->src.stride)];
1647 assert(((intptr_t)pd->pre[0].buf & 0x7) == 0);
1648 pd->pre[0].buf = &pd->pre[0].buf[raster_block_offset(BLOCK_8X8, i,
1649 pd->pre[0].stride)];
1650 if (has_second_ref(mbmi))
1651 pd->pre[1].buf = &pd->pre[1].buf[raster_block_offset(BLOCK_8X8, i,
1652 pd->pre[1].stride)];
1655 static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src,
1656 struct buf_2d orig_pre[2]) {
1657 MB_MODE_INFO *mbmi = &x->e_mbd.mi[0]->mbmi;
1658 x->plane[0].src = orig_src;
1659 x->e_mbd.plane[0].pre[0] = orig_pre[0];
1660 if (has_second_ref(mbmi))
1661 x->e_mbd.plane[0].pre[1] = orig_pre[1];
1664 static INLINE int mv_has_subpel(const MV *mv) {
1665 return (mv->row & 0x0F) || (mv->col & 0x0F);
1668 // Check if NEARESTMV/NEARMV/ZEROMV is the cheapest way encode zero motion.
1669 // TODO(aconverse): Find out if this is still productive then clean up or remove
1670 static int check_best_zero_mv(
1671 const VP9_COMP *cpi, const uint8_t mode_context[MAX_REF_FRAMES],
1672 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
1673 int disable_inter_mode_mask, int this_mode,
1674 const MV_REFERENCE_FRAME ref_frames[2]) {
1675 if (!(disable_inter_mode_mask & (1 << INTER_OFFSET(ZEROMV))) &&
1676 (this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) &&
1677 frame_mv[this_mode][ref_frames[0]].as_int == 0 &&
1678 (ref_frames[1] == NONE ||
1679 frame_mv[this_mode][ref_frames[1]].as_int == 0)) {
1680 int rfc = mode_context[ref_frames[0]];
1681 int c1 = cost_mv_ref(cpi, NEARMV, rfc);
1682 int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
1683 int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
1685 if (this_mode == NEARMV) {
1686 if (c1 > c3) return 0;
1687 } else if (this_mode == NEARESTMV) {
1688 if (c2 > c3) return 0;
1690 assert(this_mode == ZEROMV);
1691 if (ref_frames[1] == NONE) {
1692 if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0) ||
1693 (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0))
1696 if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0 &&
1697 frame_mv[NEARESTMV][ref_frames[1]].as_int == 0) ||
1698 (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0 &&
1699 frame_mv[NEARMV][ref_frames[1]].as_int == 0))
1707 static int64_t rd_pick_best_sub8x8_mode(VP9_COMP *cpi, MACROBLOCK *x,
1708 const TileInfo * const tile,
1709 int_mv *best_ref_mv,
1710 int_mv *second_best_ref_mv,
1711 int64_t best_rd, int *returntotrate,
1713 int64_t *returndistortion,
1714 int *skippable, int64_t *psse,
1716 int_mv seg_mvs[4][MAX_REF_FRAMES],
1717 BEST_SEG_INFO *bsi_buf, int filter_idx,
1718 int mi_row, int mi_col) {
1720 BEST_SEG_INFO *bsi = bsi_buf + filter_idx;
1721 MACROBLOCKD *xd = &x->e_mbd;
1722 MODE_INFO *mi = xd->mi[0];
1723 MB_MODE_INFO *mbmi = &mi->mbmi;
1725 int k, br = 0, idx, idy;
1726 int64_t bd = 0, block_sse = 0;
1727 PREDICTION_MODE this_mode;
1728 VP9_COMMON *cm = &cpi->common;
1729 struct macroblock_plane *const p = &x->plane[0];
1730 struct macroblockd_plane *const pd = &xd->plane[0];
1731 const int label_count = 4;
1732 int64_t this_segment_rd = 0;
1733 int label_mv_thresh;
1734 int segmentyrate = 0;
1735 const BLOCK_SIZE bsize = mbmi->sb_type;
1736 const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1737 const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1738 ENTROPY_CONTEXT t_above[2], t_left[2];
1739 int subpelmv = 1, have_ref = 0;
1740 const int has_second_rf = has_second_ref(mbmi);
1741 const int disable_inter_mode_mask = cpi->sf.disable_inter_mode_mask[bsize];
1745 bsi->segment_rd = best_rd;
1746 bsi->ref_mv[0] = best_ref_mv;
1747 bsi->ref_mv[1] = second_best_ref_mv;
1748 bsi->mvp.as_int = best_ref_mv->as_int;
1749 bsi->mvthresh = mvthresh;
1751 for (i = 0; i < 4; i++)
1752 bsi->modes[i] = ZEROMV;
1754 vpx_memcpy(t_above, pd->above_context, sizeof(t_above));
1755 vpx_memcpy(t_left, pd->left_context, sizeof(t_left));
1757 // 64 makes this threshold really big effectively
1758 // making it so that we very rarely check mvs on
1759 // segments. setting this to 1 would make mv thresh
1760 // roughly equal to what it is for macroblocks
1761 label_mv_thresh = 1 * bsi->mvthresh / label_count;
1763 // Segmentation method overheads
1764 for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
1765 for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
1766 // TODO(jingning,rbultje): rewrite the rate-distortion optimization
1767 // loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop
1768 int_mv mode_mv[MB_MODE_COUNT][2];
1769 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
1770 PREDICTION_MODE mode_selected = ZEROMV;
1771 int64_t best_rd = INT64_MAX;
1772 const int i = idy * 2 + idx;
1775 for (ref = 0; ref < 1 + has_second_rf; ++ref) {
1776 const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref];
1777 frame_mv[ZEROMV][frame].as_int = 0;
1778 vp9_append_sub8x8_mvs_for_idx(cm, xd, tile, i, ref, mi_row, mi_col,
1779 &frame_mv[NEARESTMV][frame],
1780 &frame_mv[NEARMV][frame]);
1783 // search for the best motion vector on this segment
1784 for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
1785 const struct buf_2d orig_src = x->plane[0].src;
1786 struct buf_2d orig_pre[2];
1788 mode_idx = INTER_OFFSET(this_mode);
1789 bsi->rdstat[i][mode_idx].brdcost = INT64_MAX;
1790 if (disable_inter_mode_mask & (1 << mode_idx))
1793 if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv,
1794 disable_inter_mode_mask,
1795 this_mode, mbmi->ref_frame))
1798 vpx_memcpy(orig_pre, pd->pre, sizeof(orig_pre));
1799 vpx_memcpy(bsi->rdstat[i][mode_idx].ta, t_above,
1800 sizeof(bsi->rdstat[i][mode_idx].ta));
1801 vpx_memcpy(bsi->rdstat[i][mode_idx].tl, t_left,
1802 sizeof(bsi->rdstat[i][mode_idx].tl));
1804 // motion search for newmv (single predictor case only)
1805 if (!has_second_rf && this_mode == NEWMV &&
1806 seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV) {
1807 MV *const new_mv = &mode_mv[NEWMV][0].as_mv;
1809 int thissme, bestsme = INT_MAX;
1810 int sadpb = x->sadperbit4;
1814 /* Is the best so far sufficiently good that we cant justify doing
1815 * and new motion search. */
1816 if (best_rd < label_mv_thresh)
1819 if (!is_best_mode(cpi->oxcf.mode)) {
1820 // use previous block's result as next block's MV predictor.
1822 bsi->mvp.as_int = mi->bmi[i - 1].as_mv[0].as_int;
1824 bsi->mvp.as_int = mi->bmi[i - 2].as_mv[0].as_int;
1828 max_mv = x->max_mv_context[mbmi->ref_frame[0]];
1830 max_mv = MAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3;
1832 if (cpi->sf.auto_mv_step_size && cm->show_frame) {
1833 // Take wtd average of the step_params based on the last frame's
1834 // max mv magnitude and the best ref mvs of the current block for
1835 // the given reference.
1836 step_param = (vp9_init_search_range(&cpi->sf, max_mv) +
1837 cpi->mv_step_param) / 2;
1839 step_param = cpi->mv_step_param;
1842 mvp_full.row = bsi->mvp.as_mv.row >> 3;
1843 mvp_full.col = bsi->mvp.as_mv.col >> 3;
1845 if (cpi->sf.adaptive_motion_search && cm->show_frame) {
1846 mvp_full.row = x->pred_mv[mbmi->ref_frame[0]].as_mv.row >> 3;
1847 mvp_full.col = x->pred_mv[mbmi->ref_frame[0]].as_mv.col >> 3;
1848 step_param = MAX(step_param, 8);
1851 // adjust src pointer for this block
1854 vp9_set_mv_search_range(x, &bsi->ref_mv[0]->as_mv);
1856 bestsme = full_pixel_search(cpi, x, bsize, &mvp_full, step_param,
1857 sadpb, &bsi->ref_mv[0]->as_mv, new_mv,
1860 // Should we do a full search (best quality only)
1861 if (is_best_mode(cpi->oxcf.mode)) {
1862 int_mv *const best_mv = &mi->bmi[i].as_mv[0];
1863 /* Check if mvp_full is within the range. */
1864 clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max,
1865 x->mv_row_min, x->mv_row_max);
1866 thissme = cpi->full_search_sad(x, &mvp_full,
1867 sadpb, 16, &cpi->fn_ptr[bsize],
1868 &bsi->ref_mv[0]->as_mv,
1870 if (thissme < bestsme) {
1872 *new_mv = best_mv->as_mv;
1874 // The full search result is actually worse so re-instate the
1875 // previous best vector
1876 best_mv->as_mv = *new_mv;
1880 if (bestsme < INT_MAX) {
1882 cpi->find_fractional_mv_step(x,
1884 &bsi->ref_mv[0]->as_mv,
1885 cm->allow_high_precision_mv,
1886 x->errorperbit, &cpi->fn_ptr[bsize],
1887 cpi->sf.subpel_force_stop,
1888 cpi->sf.subpel_iters_per_step,
1889 x->nmvjointcost, x->mvcost,
1891 &x->pred_sse[mbmi->ref_frame[0]]);
1893 // save motion search result for use in compound prediction
1894 seg_mvs[i][mbmi->ref_frame[0]].as_mv = *new_mv;
1897 if (cpi->sf.adaptive_motion_search)
1898 x->pred_mv[mbmi->ref_frame[0]].as_mv = *new_mv;
1900 // restore src pointers
1901 mi_buf_restore(x, orig_src, orig_pre);
1904 if (has_second_rf) {
1905 if (seg_mvs[i][mbmi->ref_frame[1]].as_int == INVALID_MV ||
1906 seg_mvs[i][mbmi->ref_frame[0]].as_int == INVALID_MV)
1910 if (has_second_rf && this_mode == NEWMV &&
1911 mbmi->interp_filter == EIGHTTAP) {
1912 // adjust src pointers
1914 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
1916 joint_motion_search(cpi, x, bsize, frame_mv[this_mode],
1917 mi_row, mi_col, seg_mvs[i],
1919 seg_mvs[i][mbmi->ref_frame[0]].as_int =
1920 frame_mv[this_mode][mbmi->ref_frame[0]].as_int;
1921 seg_mvs[i][mbmi->ref_frame[1]].as_int =
1922 frame_mv[this_mode][mbmi->ref_frame[1]].as_int;
1924 // restore src pointers
1925 mi_buf_restore(x, orig_src, orig_pre);
1928 bsi->rdstat[i][mode_idx].brate =
1929 set_and_cost_bmi_mvs(cpi, xd, i, this_mode, mode_mv[this_mode],
1930 frame_mv, seg_mvs[i], bsi->ref_mv,
1931 x->nmvjointcost, x->mvcost);
1933 for (ref = 0; ref < 1 + has_second_rf; ++ref) {
1934 bsi->rdstat[i][mode_idx].mvs[ref].as_int =
1935 mode_mv[this_mode][ref].as_int;
1936 if (num_4x4_blocks_wide > 1)
1937 bsi->rdstat[i + 1][mode_idx].mvs[ref].as_int =
1938 mode_mv[this_mode][ref].as_int;
1939 if (num_4x4_blocks_high > 1)
1940 bsi->rdstat[i + 2][mode_idx].mvs[ref].as_int =
1941 mode_mv[this_mode][ref].as_int;
1944 // Trap vectors that reach beyond the UMV borders
1945 if (mv_check_bounds(x, &mode_mv[this_mode][0].as_mv) ||
1947 mv_check_bounds(x, &mode_mv[this_mode][1].as_mv)))
1950 if (filter_idx > 0) {
1951 BEST_SEG_INFO *ref_bsi = bsi_buf;
1955 for (ref = 0; ref < 1 + has_second_rf; ++ref) {
1956 subpelmv |= mv_has_subpel(&mode_mv[this_mode][ref].as_mv);
1957 have_ref &= mode_mv[this_mode][ref].as_int ==
1958 ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int;
1961 if (filter_idx > 1 && !subpelmv && !have_ref) {
1962 ref_bsi = bsi_buf + 1;
1964 for (ref = 0; ref < 1 + has_second_rf; ++ref)
1965 have_ref &= mode_mv[this_mode][ref].as_int ==
1966 ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int;
1969 if (!subpelmv && have_ref &&
1970 ref_bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
1971 vpx_memcpy(&bsi->rdstat[i][mode_idx], &ref_bsi->rdstat[i][mode_idx],
1972 sizeof(SEG_RDSTAT));
1973 if (num_4x4_blocks_wide > 1)
1974 bsi->rdstat[i + 1][mode_idx].eobs =
1975 ref_bsi->rdstat[i + 1][mode_idx].eobs;
1976 if (num_4x4_blocks_high > 1)
1977 bsi->rdstat[i + 2][mode_idx].eobs =
1978 ref_bsi->rdstat[i + 2][mode_idx].eobs;
1980 if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
1981 mode_selected = this_mode;
1982 best_rd = bsi->rdstat[i][mode_idx].brdcost;
1988 bsi->rdstat[i][mode_idx].brdcost =
1989 encode_inter_mb_segment(cpi, x,
1990 bsi->segment_rd - this_segment_rd, i,
1991 &bsi->rdstat[i][mode_idx].byrate,
1992 &bsi->rdstat[i][mode_idx].bdist,
1993 &bsi->rdstat[i][mode_idx].bsse,
1994 bsi->rdstat[i][mode_idx].ta,
1995 bsi->rdstat[i][mode_idx].tl,
1997 if (bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
1998 bsi->rdstat[i][mode_idx].brdcost += RDCOST(x->rdmult, x->rddiv,
1999 bsi->rdstat[i][mode_idx].brate, 0);
2000 bsi->rdstat[i][mode_idx].brate += bsi->rdstat[i][mode_idx].byrate;
2001 bsi->rdstat[i][mode_idx].eobs = p->eobs[i];
2002 if (num_4x4_blocks_wide > 1)
2003 bsi->rdstat[i + 1][mode_idx].eobs = p->eobs[i + 1];
2004 if (num_4x4_blocks_high > 1)
2005 bsi->rdstat[i + 2][mode_idx].eobs = p->eobs[i + 2];
2008 if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
2009 mode_selected = this_mode;
2010 best_rd = bsi->rdstat[i][mode_idx].brdcost;
2012 } /*for each 4x4 mode*/
2014 if (best_rd == INT64_MAX) {
2016 for (iy = i + 1; iy < 4; ++iy)
2017 for (midx = 0; midx < INTER_MODES; ++midx)
2018 bsi->rdstat[iy][midx].brdcost = INT64_MAX;
2019 bsi->segment_rd = INT64_MAX;
2023 mode_idx = INTER_OFFSET(mode_selected);
2024 vpx_memcpy(t_above, bsi->rdstat[i][mode_idx].ta, sizeof(t_above));
2025 vpx_memcpy(t_left, bsi->rdstat[i][mode_idx].tl, sizeof(t_left));
2027 set_and_cost_bmi_mvs(cpi, xd, i, mode_selected, mode_mv[mode_selected],
2028 frame_mv, seg_mvs[i], bsi->ref_mv, x->nmvjointcost,
2031 br += bsi->rdstat[i][mode_idx].brate;
2032 bd += bsi->rdstat[i][mode_idx].bdist;
2033 block_sse += bsi->rdstat[i][mode_idx].bsse;
2034 segmentyrate += bsi->rdstat[i][mode_idx].byrate;
2035 this_segment_rd += bsi->rdstat[i][mode_idx].brdcost;
2037 if (this_segment_rd > bsi->segment_rd) {
2039 for (iy = i + 1; iy < 4; ++iy)
2040 for (midx = 0; midx < INTER_MODES; ++midx)
2041 bsi->rdstat[iy][midx].brdcost = INT64_MAX;
2042 bsi->segment_rd = INT64_MAX;
2046 } /* for each label */
2050 bsi->segment_yrate = segmentyrate;
2051 bsi->segment_rd = this_segment_rd;
2052 bsi->sse = block_sse;
2054 // update the coding decisions
2055 for (k = 0; k < 4; ++k)
2056 bsi->modes[k] = mi->bmi[k].as_mode;
2058 if (bsi->segment_rd > best_rd)
2060 /* set it to the best */
2061 for (i = 0; i < 4; i++) {
2062 mode_idx = INTER_OFFSET(bsi->modes[i]);
2063 mi->bmi[i].as_mv[0].as_int = bsi->rdstat[i][mode_idx].mvs[0].as_int;
2064 if (has_second_ref(mbmi))
2065 mi->bmi[i].as_mv[1].as_int = bsi->rdstat[i][mode_idx].mvs[1].as_int;
2066 x->plane[0].eobs[i] = bsi->rdstat[i][mode_idx].eobs;
2067 mi->bmi[i].as_mode = bsi->modes[i];
2071 * used to set mbmi->mv.as_int
2073 *returntotrate = bsi->r;
2074 *returndistortion = bsi->d;
2075 *returnyrate = bsi->segment_yrate;
2076 *skippable = vp9_is_skippable_in_plane(x, BLOCK_8X8, 0);
2078 mbmi->mode = bsi->modes[3];
2080 return bsi->segment_rd;
2083 static void mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
2084 uint8_t *ref_y_buffer, int ref_y_stride,
2085 int ref_frame, BLOCK_SIZE block_size ) {
2086 MACROBLOCKD *xd = &x->e_mbd;
2087 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
2092 int best_sad = INT_MAX;
2093 int this_sad = INT_MAX;
2096 uint8_t *src_y_ptr = x->plane[0].src.buf;
2098 int row_offset, col_offset;
2099 int num_mv_refs = MAX_MV_REF_CANDIDATES +
2100 (cpi->sf.adaptive_motion_search &&
2101 cpi->common.show_frame &&
2102 block_size < cpi->sf.max_partition_size);
2105 pred_mv[0] = mbmi->ref_mvs[ref_frame][0];
2106 pred_mv[1] = mbmi->ref_mvs[ref_frame][1];
2107 pred_mv[2] = x->pred_mv[ref_frame];
2109 // Get the sad for each candidate reference mv
2110 for (i = 0; i < num_mv_refs; i++) {
2111 this_mv.as_int = pred_mv[i].as_int;
2113 max_mv = MAX(max_mv,
2114 MAX(abs(this_mv.as_mv.row), abs(this_mv.as_mv.col)) >> 3);
2115 // only need to check zero mv once
2116 if (!this_mv.as_int && zero_seen)
2119 zero_seen = zero_seen || !this_mv.as_int;
2121 row_offset = this_mv.as_mv.row >> 3;
2122 col_offset = this_mv.as_mv.col >> 3;
2123 ref_y_ptr = ref_y_buffer + (ref_y_stride * row_offset) + col_offset;
2125 // Find sad for current vector.
2126 this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
2127 ref_y_ptr, ref_y_stride,
2130 // Note if it is the best so far.
2131 if (this_sad < best_sad) {
2132 best_sad = this_sad;
2137 // Note the index of the mv that worked best in the reference list.
2138 x->mv_best_ref_index[ref_frame] = best_index;
2139 x->max_mv_context[ref_frame] = max_mv;
2140 x->pred_mv_sad[ref_frame] = best_sad;
2143 static void estimate_ref_frame_costs(const VP9_COMMON *cm,
2144 const MACROBLOCKD *xd,
2146 unsigned int *ref_costs_single,
2147 unsigned int *ref_costs_comp,
2148 vp9_prob *comp_mode_p) {
2149 int seg_ref_active = vp9_segfeature_active(&cm->seg, segment_id,
2151 if (seg_ref_active) {
2152 vpx_memset(ref_costs_single, 0, MAX_REF_FRAMES * sizeof(*ref_costs_single));
2153 vpx_memset(ref_costs_comp, 0, MAX_REF_FRAMES * sizeof(*ref_costs_comp));
2156 vp9_prob intra_inter_p = vp9_get_intra_inter_prob(cm, xd);
2157 vp9_prob comp_inter_p = 128;
2159 if (cm->reference_mode == REFERENCE_MODE_SELECT) {
2160 comp_inter_p = vp9_get_reference_mode_prob(cm, xd);
2161 *comp_mode_p = comp_inter_p;
2166 ref_costs_single[INTRA_FRAME] = vp9_cost_bit(intra_inter_p, 0);
2168 if (cm->reference_mode != COMPOUND_REFERENCE) {
2169 vp9_prob ref_single_p1 = vp9_get_pred_prob_single_ref_p1(cm, xd);
2170 vp9_prob ref_single_p2 = vp9_get_pred_prob_single_ref_p2(cm, xd);
2171 unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1);
2173 if (cm->reference_mode == REFERENCE_MODE_SELECT)
2174 base_cost += vp9_cost_bit(comp_inter_p, 0);
2176 ref_costs_single[LAST_FRAME] = ref_costs_single[GOLDEN_FRAME] =
2177 ref_costs_single[ALTREF_FRAME] = base_cost;
2178 ref_costs_single[LAST_FRAME] += vp9_cost_bit(ref_single_p1, 0);
2179 ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p1, 1);
2180 ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p1, 1);
2181 ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p2, 0);
2182 ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p2, 1);
2184 ref_costs_single[LAST_FRAME] = 512;
2185 ref_costs_single[GOLDEN_FRAME] = 512;
2186 ref_costs_single[ALTREF_FRAME] = 512;
2188 if (cm->reference_mode != SINGLE_REFERENCE) {
2189 vp9_prob ref_comp_p = vp9_get_pred_prob_comp_ref_p(cm, xd);
2190 unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1);
2192 if (cm->reference_mode == REFERENCE_MODE_SELECT)
2193 base_cost += vp9_cost_bit(comp_inter_p, 1);
2195 ref_costs_comp[LAST_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 0);
2196 ref_costs_comp[GOLDEN_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 1);
2198 ref_costs_comp[LAST_FRAME] = 512;
2199 ref_costs_comp[GOLDEN_FRAME] = 512;
2204 static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
2207 int_mv *second_ref_mv,
2208 int64_t comp_pred_diff[REFERENCE_MODES],
2209 const int64_t tx_size_diff[TX_MODES],
2210 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]) {
2211 MACROBLOCKD *const xd = &x->e_mbd;
2213 // Take a snapshot of the coding context so it can be
2214 // restored if we decide to encode this way
2215 ctx->skip = x->skip;
2216 ctx->best_mode_index = mode_index;
2217 ctx->mic = *xd->mi[0];
2219 ctx->best_ref_mv[0].as_int = ref_mv->as_int;
2220 ctx->best_ref_mv[1].as_int = second_ref_mv->as_int;
2222 ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE];
2223 ctx->comp_pred_diff = (int)comp_pred_diff[COMPOUND_REFERENCE];
2224 ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT];
2226 vpx_memcpy(ctx->tx_rd_diff, tx_size_diff, sizeof(ctx->tx_rd_diff));
2227 vpx_memcpy(ctx->best_filter_diff, best_filter_diff,
2228 sizeof(*best_filter_diff) * SWITCHABLE_FILTER_CONTEXTS);
2231 static void setup_pred_block(const MACROBLOCKD *xd,
2232 struct buf_2d dst[MAX_MB_PLANE],
2233 const YV12_BUFFER_CONFIG *src,
2234 int mi_row, int mi_col,
2235 const struct scale_factors *scale,
2236 const struct scale_factors *scale_uv) {
2239 dst[0].buf = src->y_buffer;
2240 dst[0].stride = src->y_stride;
2241 dst[1].buf = src->u_buffer;
2242 dst[2].buf = src->v_buffer;
2243 dst[1].stride = dst[2].stride = src->uv_stride;
2245 dst[3].buf = src->alpha_buffer;
2246 dst[3].stride = src->alpha_stride;
2249 // TODO(jkoleszar): Make scale factors per-plane data
2250 for (i = 0; i < MAX_MB_PLANE; i++) {
2251 setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col,
2252 i ? scale_uv : scale,
2253 xd->plane[i].subsampling_x, xd->plane[i].subsampling_y);
2257 void vp9_setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
2258 const TileInfo *const tile,
2259 MV_REFERENCE_FRAME ref_frame,
2260 BLOCK_SIZE block_size,
2261 int mi_row, int mi_col,
2262 int_mv frame_nearest_mv[MAX_REF_FRAMES],
2263 int_mv frame_near_mv[MAX_REF_FRAMES],
2264 struct buf_2d yv12_mb[4][MAX_MB_PLANE]) {
2265 const VP9_COMMON *cm = &cpi->common;
2266 const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
2267 MACROBLOCKD *const xd = &x->e_mbd;
2268 MODE_INFO *const mi = xd->mi[0];
2269 int_mv *const candidates = mi->mbmi.ref_mvs[ref_frame];
2270 const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf;
2272 // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this
2273 // use the UV scaling factors.
2274 setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf);
2276 // Gets an initial list of candidate vectors from neighbours and orders them
2277 vp9_find_mv_refs(cm, xd, tile, mi, ref_frame, candidates, mi_row, mi_col);
2279 // Candidate refinement carried out at encoder and decoder
2280 vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates,
2281 &frame_nearest_mv[ref_frame],
2282 &frame_near_mv[ref_frame]);
2284 // Further refinement that is encode side only to test the top few candidates
2285 // in full and choose the best as the centre point for subsequent searches.
2286 // The current implementation doesn't support scaling.
2287 if (!vp9_is_scaled(sf) && block_size >= BLOCK_8X8)
2288 mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride,
2289 ref_frame, block_size);
2292 const YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi,
2294 const VP9_COMMON *const cm = &cpi->common;
2295 const int ref_idx = cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)];
2296 const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1];
2297 return (scaled_idx != ref_idx) ? &cm->frame_bufs[scaled_idx].buf : NULL;
2300 int vp9_get_switchable_rate(const VP9_COMP *cpi) {
2301 const MACROBLOCKD *const xd = &cpi->mb.e_mbd;
2302 const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
2303 const int ctx = vp9_get_pred_context_switchable_interp(xd);
2304 return SWITCHABLE_INTERP_RATE_FACTOR *
2305 cpi->switchable_interp_costs[ctx][mbmi->interp_filter];
2308 static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
2310 int mi_row, int mi_col,
2311 int_mv *tmp_mv, int *rate_mv) {
2312 MACROBLOCKD *xd = &x->e_mbd;
2313 const VP9_COMMON *cm = &cpi->common;
2314 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
2315 struct buf_2d backup_yv12[MAX_MB_PLANE] = {{0}};
2316 int bestsme = INT_MAX;
2318 int sadpb = x->sadperbit16;
2320 int ref = mbmi->ref_frame[0];
2321 MV ref_mv = mbmi->ref_mvs[ref][0].as_mv;
2323 int tmp_col_min = x->mv_col_min;
2324 int tmp_col_max = x->mv_col_max;
2325 int tmp_row_min = x->mv_row_min;
2326 int tmp_row_max = x->mv_row_max;
2328 const YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi,
2332 pred_mv[0] = mbmi->ref_mvs[ref][0].as_mv;
2333 pred_mv[1] = mbmi->ref_mvs[ref][1].as_mv;
2334 pred_mv[2] = x->pred_mv[ref].as_mv;
2336 if (scaled_ref_frame) {
2338 // Swap out the reference frame for a version that's been scaled to
2339 // match the resolution of the current frame, allowing the existing
2340 // motion search code to be used without additional modifications.
2341 for (i = 0; i < MAX_MB_PLANE; i++)
2342 backup_yv12[i] = xd->plane[i].pre[0];
2344 vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
2347 vp9_set_mv_search_range(x, &ref_mv);
2349 // Work out the size of the first step in the mv step search.
2350 // 0 here is maximum length first step. 1 is MAX >> 1 etc.
2351 if (cpi->sf.auto_mv_step_size && cm->show_frame) {
2352 // Take wtd average of the step_params based on the last frame's
2353 // max mv magnitude and that based on the best ref mvs of the current
2354 // block for the given reference.
2355 step_param = (vp9_init_search_range(&cpi->sf, x->max_mv_context[ref]) +
2356 cpi->mv_step_param) / 2;
2358 step_param = cpi->mv_step_param;
2361 if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64 &&
2363 int boffset = 2 * (b_width_log2(BLOCK_64X64) - MIN(b_height_log2(bsize),
2364 b_width_log2(bsize)));
2365 step_param = MAX(step_param, boffset);
2368 if (cpi->sf.adaptive_motion_search) {
2369 int bwl = b_width_log2_lookup[bsize];
2370 int bhl = b_height_log2_lookup[bsize];
2372 int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4);
2377 for (i = LAST_FRAME; i <= ALTREF_FRAME && cm->show_frame; ++i) {
2378 if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) {
2379 x->pred_mv[ref].as_int = 0;
2380 tmp_mv->as_int = INVALID_MV;
2382 if (scaled_ref_frame) {
2384 for (i = 0; i < MAX_MB_PLANE; i++)
2385 xd->plane[i].pre[0] = backup_yv12[i];
2392 mvp_full = pred_mv[x->mv_best_ref_index[ref]];
2397 bestsme = full_pixel_search(cpi, x, bsize, &mvp_full, step_param, sadpb,
2398 &ref_mv, &tmp_mv->as_mv, INT_MAX, 1);
2400 x->mv_col_min = tmp_col_min;
2401 x->mv_col_max = tmp_col_max;
2402 x->mv_row_min = tmp_row_min;
2403 x->mv_row_max = tmp_row_max;
2405 if (bestsme < INT_MAX) {
2406 int dis; /* TODO: use dis in distortion calculation later. */
2407 cpi->find_fractional_mv_step(x, &tmp_mv->as_mv, &ref_mv,
2408 cm->allow_high_precision_mv,
2410 &cpi->fn_ptr[bsize],
2411 cpi->sf.subpel_force_stop,
2412 cpi->sf.subpel_iters_per_step,
2413 x->nmvjointcost, x->mvcost,
2414 &dis, &x->pred_sse[ref]);
2416 *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv,
2417 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2419 if (cpi->sf.adaptive_motion_search && cm->show_frame)
2420 x->pred_mv[ref].as_int = tmp_mv->as_int;
2422 if (scaled_ref_frame) {
2424 for (i = 0; i < MAX_MB_PLANE; i++)
2425 xd->plane[i].pre[0] = backup_yv12[i];
2429 static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
2432 int mi_row, int mi_col,
2433 int_mv single_newmv[MAX_REF_FRAMES],
2435 const int pw = 4 * num_4x4_blocks_wide_lookup[bsize];
2436 const int ph = 4 * num_4x4_blocks_high_lookup[bsize];
2437 MACROBLOCKD *xd = &x->e_mbd;
2438 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
2439 const int refs[2] = { mbmi->ref_frame[0],
2440 mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
2443 // Prediction buffer from second frame.
2444 uint8_t *second_pred = vpx_memalign(16, pw * ph * sizeof(uint8_t));
2445 const InterpKernel *kernel = vp9_get_interp_kernel(mbmi->interp_filter);
2447 // Do joint motion search in compound mode to get more accurate mv.
2448 struct buf_2d backup_yv12[2][MAX_MB_PLANE];
2449 struct buf_2d scaled_first_yv12 = xd->plane[0].pre[0];
2450 int last_besterr[2] = {INT_MAX, INT_MAX};
2451 const YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = {
2452 vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[0]),
2453 vp9_get_scaled_ref_frame(cpi, mbmi->ref_frame[1])
2456 for (ref = 0; ref < 2; ++ref) {
2457 ref_mv[ref] = mbmi->ref_mvs[refs[ref]][0];
2459 if (scaled_ref_frame[ref]) {
2461 // Swap out the reference frame for a version that's been scaled to
2462 // match the resolution of the current frame, allowing the existing
2463 // motion search code to be used without additional modifications.
2464 for (i = 0; i < MAX_MB_PLANE; i++)
2465 backup_yv12[ref][i] = xd->plane[i].pre[ref];
2466 vp9_setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col,
2470 frame_mv[refs[ref]].as_int = single_newmv[refs[ref]].as_int;
2473 // Allow joint search multiple times iteratively for each ref frame
2474 // and break out the search loop if it couldn't find better mv.
2475 for (ite = 0; ite < 4; ite++) {
2476 struct buf_2d ref_yv12[2];
2477 int bestsme = INT_MAX;
2478 int sadpb = x->sadperbit16;
2480 int search_range = 3;
2482 int tmp_col_min = x->mv_col_min;
2483 int tmp_col_max = x->mv_col_max;
2484 int tmp_row_min = x->mv_row_min;
2485 int tmp_row_max = x->mv_row_max;
2488 // Initialized here because of compiler problem in Visual Studio.
2489 ref_yv12[0] = xd->plane[0].pre[0];
2490 ref_yv12[1] = xd->plane[0].pre[1];
2492 // Get pred block from second frame.
2493 vp9_build_inter_predictor(ref_yv12[!id].buf,
2494 ref_yv12[!id].stride,
2496 &frame_mv[refs[!id]].as_mv,
2497 &xd->block_refs[!id]->sf,
2499 kernel, MV_PRECISION_Q3,
2500 mi_col * MI_SIZE, mi_row * MI_SIZE);
2502 // Compound motion search on first ref frame.
2504 xd->plane[0].pre[0] = ref_yv12[id];
2505 vp9_set_mv_search_range(x, &ref_mv[id].as_mv);
2507 // Use mv result from single mode as mvp.
2508 tmp_mv = frame_mv[refs[id]].as_mv;
2513 // Small-range full-pixel motion search
2514 bestsme = vp9_refining_search_8p_c(x, &tmp_mv, sadpb,
2516 &cpi->fn_ptr[bsize],
2517 &ref_mv[id].as_mv, second_pred,
2519 if (bestsme < INT_MAX)
2520 bestsme = vp9_get_mvpred_av_var(x, &tmp_mv, &ref_mv[id].as_mv,
2521 second_pred, &cpi->fn_ptr[bsize], 1);
2523 x->mv_col_min = tmp_col_min;
2524 x->mv_col_max = tmp_col_max;
2525 x->mv_row_min = tmp_row_min;
2526 x->mv_row_max = tmp_row_max;
2528 if (bestsme < INT_MAX) {
2529 int dis; /* TODO: use dis in distortion calculation later. */
2531 bestsme = cpi->find_fractional_mv_step_comp(
2534 cpi->common.allow_high_precision_mv,
2536 &cpi->fn_ptr[bsize],
2537 0, cpi->sf.subpel_iters_per_step,
2538 x->nmvjointcost, x->mvcost,
2539 &dis, &sse, second_pred,
2544 xd->plane[0].pre[0] = scaled_first_yv12;
2546 if (bestsme < last_besterr[id]) {
2547 frame_mv[refs[id]].as_mv = tmp_mv;
2548 last_besterr[id] = bestsme;
2556 for (ref = 0; ref < 2; ++ref) {
2557 if (scaled_ref_frame[ref]) {
2558 // restore the predictor
2560 for (i = 0; i < MAX_MB_PLANE; i++)
2561 xd->plane[i].pre[ref] = backup_yv12[ref][i];
2564 *rate_mv += vp9_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
2565 &mbmi->ref_mvs[refs[ref]][0].as_mv,
2566 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2569 vpx_free(second_pred);
2572 static INLINE void restore_dst_buf(MACROBLOCKD *xd,
2573 uint8_t *orig_dst[MAX_MB_PLANE],
2574 int orig_dst_stride[MAX_MB_PLANE]) {
2576 for (i = 0; i < MAX_MB_PLANE; i++) {
2577 xd->plane[i].dst.buf = orig_dst[i];
2578 xd->plane[i].dst.stride = orig_dst_stride[i];
2582 static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
2584 int64_t txfm_cache[],
2585 int *rate2, int64_t *distortion,
2587 int *rate_y, int64_t *distortion_y,
2588 int *rate_uv, int64_t *distortion_uv,
2589 int *mode_excluded, int *disable_skip,
2590 INTERP_FILTER *best_filter,
2591 int_mv (*mode_mv)[MAX_REF_FRAMES],
2592 int mi_row, int mi_col,
2593 int_mv single_newmv[MAX_REF_FRAMES],
2595 const int64_t ref_best_rd) {
2596 VP9_COMMON *cm = &cpi->common;
2597 RD_OPT *rd_opt = &cpi->rd;
2598 MACROBLOCKD *xd = &x->e_mbd;
2599 MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
2600 const int is_comp_pred = has_second_ref(mbmi);
2601 const int num_refs = is_comp_pred ? 2 : 1;
2602 const int this_mode = mbmi->mode;
2603 int_mv *frame_mv = mode_mv[this_mode];
2605 int refs[2] = { mbmi->ref_frame[0],
2606 (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
2608 int64_t this_rd = 0;
2609 DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf, MAX_MB_PLANE * 64 * 64);
2610 int pred_exists = 0;
2612 int64_t rd, best_rd = INT64_MAX;
2613 int best_needs_copy = 0;
2614 uint8_t *orig_dst[MAX_MB_PLANE];
2615 int orig_dst_stride[MAX_MB_PLANE];
2619 if (frame_mv[refs[0]].as_int == INVALID_MV ||
2620 frame_mv[refs[1]].as_int == INVALID_MV)
2624 if (this_mode == NEWMV) {
2627 // Initialize mv using single prediction mode result.
2628 frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
2629 frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
2631 if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
2632 joint_motion_search(cpi, x, bsize, frame_mv,
2633 mi_row, mi_col, single_newmv, &rate_mv);
2635 rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]].as_mv,
2636 &mbmi->ref_mvs[refs[0]][0].as_mv,
2637 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2638 rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]].as_mv,
2639 &mbmi->ref_mvs[refs[1]][0].as_mv,
2640 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2645 single_motion_search(cpi, x, bsize, mi_row, mi_col,
2647 if (tmp_mv.as_int == INVALID_MV)
2650 frame_mv[refs[0]].as_int =
2651 xd->mi[0]->bmi[0].as_mv[0].as_int = tmp_mv.as_int;
2652 single_newmv[refs[0]].as_int = tmp_mv.as_int;
2656 for (i = 0; i < num_refs; ++i) {
2657 cur_mv[i] = frame_mv[refs[i]];
2658 // Clip "next_nearest" so that it does not extend to far out of image
2659 if (this_mode != NEWMV)
2660 clamp_mv2(&cur_mv[i].as_mv, xd);
2662 if (mv_check_bounds(x, &cur_mv[i].as_mv))
2664 mbmi->mv[i].as_int = cur_mv[i].as_int;
2667 // do first prediction into the destination buffer. Do the next
2668 // prediction into a temporary buffer. Then keep track of which one
2669 // of these currently holds the best predictor, and use the other
2670 // one for future predictions. In the end, copy from tmp_buf to
2671 // dst if necessary.
2672 for (i = 0; i < MAX_MB_PLANE; i++) {
2673 orig_dst[i] = xd->plane[i].dst.buf;
2674 orig_dst_stride[i] = xd->plane[i].dst.stride;
2677 /* We don't include the cost of the second reference here, because there
2678 * are only three options: Last/Golden, ARF/Last or Golden/ARF, or in other
2679 * words if you present them in that order, the second one is always known
2680 * if the first is known */
2681 *rate2 += cost_mv_ref(cpi, this_mode, mbmi->mode_context[refs[0]]);
2683 if (!(*mode_excluded))
2684 *mode_excluded = is_comp_pred ? cm->reference_mode == SINGLE_REFERENCE
2685 : cm->reference_mode == COMPOUND_REFERENCE;
2688 // Are all MVs integer pel for Y and UV
2689 intpel_mv = !mv_has_subpel(&mbmi->mv[0].as_mv);
2691 intpel_mv &= !mv_has_subpel(&mbmi->mv[1].as_mv);
2693 // Search for best switchable filter by checking the variance of
2694 // pred error irrespective of whether the filter will be used
2695 rd_opt->mask_filter = 0;
2696 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
2697 rd_opt->filter_cache[i] = INT64_MAX;
2699 if (cm->interp_filter != BILINEAR) {
2700 *best_filter = EIGHTTAP;
2701 if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) {
2702 *best_filter = EIGHTTAP;
2705 int tmp_rate_sum = 0;
2706 int64_t tmp_dist_sum = 0;
2708 for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
2711 mbmi->interp_filter = i;
2712 rs = vp9_get_switchable_rate(cpi);
2713 rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
2715 if (i > 0 && intpel_mv) {
2716 rd = RDCOST(x->rdmult, x->rddiv, tmp_rate_sum, tmp_dist_sum);
2717 rd_opt->filter_cache[i] = rd;
2718 rd_opt->filter_cache[SWITCHABLE_FILTERS] =
2719 MIN(rd_opt->filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
2720 if (cm->interp_filter == SWITCHABLE)
2722 rd_opt->mask_filter = MAX(rd_opt->mask_filter, rd);
2725 int64_t dist_sum = 0;
2726 if ((cm->interp_filter == SWITCHABLE &&
2727 (!i || best_needs_copy)) ||
2728 (cm->interp_filter != SWITCHABLE &&
2729 (cm->interp_filter == mbmi->interp_filter ||
2730 (i == 0 && intpel_mv)))) {
2731 restore_dst_buf(xd, orig_dst, orig_dst_stride);
2733 for (j = 0; j < MAX_MB_PLANE; j++) {
2734 xd->plane[j].dst.buf = tmp_buf + j * 64 * 64;
2735 xd->plane[j].dst.stride = 64;
2738 vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
2739 model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum);
2741 rd = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum);
2742 rd_opt->filter_cache[i] = rd;
2743 rd_opt->filter_cache[SWITCHABLE_FILTERS] =
2744 MIN(rd_opt->filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
2745 if (cm->interp_filter == SWITCHABLE)
2747 rd_opt->mask_filter = MAX(rd_opt->mask_filter, rd);
2749 if (i == 0 && intpel_mv) {
2750 tmp_rate_sum = rate_sum;
2751 tmp_dist_sum = dist_sum;
2755 if (i == 0 && cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
2756 if (rd / 2 > ref_best_rd) {
2757 restore_dst_buf(xd, orig_dst, orig_dst_stride);
2761 newbest = i == 0 || rd < best_rd;
2765 *best_filter = mbmi->interp_filter;
2766 if (cm->interp_filter == SWITCHABLE && i && !intpel_mv)
2767 best_needs_copy = !best_needs_copy;
2770 if ((cm->interp_filter == SWITCHABLE && newbest) ||
2771 (cm->interp_filter != SWITCHABLE &&
2772 cm->interp_filter == mbmi->interp_filter)) {
2776 restore_dst_buf(xd, orig_dst, orig_dst_stride);
2779 // Set the appropriate filter
2780 mbmi->interp_filter = cm->interp_filter != SWITCHABLE ?
2781 cm->interp_filter : *best_filter;
2782 rs = cm->interp_filter == SWITCHABLE ? vp9_get_switchable_rate(cpi) : 0;
2785 if (best_needs_copy) {
2786 // again temporarily set the buffers to local memory to prevent a memcpy
2787 for (i = 0; i < MAX_MB_PLANE; i++) {
2788 xd->plane[i].dst.buf = tmp_buf + i * 64 * 64;
2789 xd->plane[i].dst.stride = 64;
2793 // Handles the special case when a filter that is not in the
2794 // switchable list (ex. bilinear, 6-tap) is indicated at the frame level
2795 vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
2798 if (cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
2801 model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist);
2802 rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist);
2803 // if current pred_error modeled rd is substantially more than the best
2804 // so far, do not bother doing full rd
2805 if (rd / 2 > ref_best_rd) {
2806 restore_dst_buf(xd, orig_dst, orig_dst_stride);
2811 if (cm->interp_filter == SWITCHABLE)
2812 *rate2 += vp9_get_switchable_rate(cpi);
2814 if (!is_comp_pred) {
2815 if (!x->in_active_map) {
2820 } else if (cpi->allow_encode_breakout && x->encode_breakout) {
2821 const BLOCK_SIZE y_size = get_plane_block_size(bsize, &xd->plane[0]);
2822 const BLOCK_SIZE uv_size = get_plane_block_size(bsize, &xd->plane[1]);
2823 unsigned int var, sse;
2824 // Skipping threshold for ac.
2825 unsigned int thresh_ac;
2826 // Set a maximum for threshold to avoid big PSNR loss in low bitrate case.
2827 // Use extreme low threshold for static frames to limit skipping.
2828 const unsigned int max_thresh = (cpi->allow_encode_breakout ==
2829 ENCODE_BREAKOUT_LIMITED) ? 128 : 36000;
2830 // The encode_breakout input
2831 const unsigned int min_thresh =
2832 MIN(((unsigned int)x->encode_breakout << 4), max_thresh);
2834 // Calculate threshold according to dequant value.
2835 thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) / 9;
2836 thresh_ac = clamp(thresh_ac, min_thresh, max_thresh);
2838 var = cpi->fn_ptr[y_size].vf(x->plane[0].src.buf, x->plane[0].src.stride,
2839 xd->plane[0].dst.buf,
2840 xd->plane[0].dst.stride, &sse);
2842 // Adjust threshold according to partition size.
2843 thresh_ac >>= 8 - (b_width_log2_lookup[bsize] +
2844 b_height_log2_lookup[bsize]);
2846 // Y skipping condition checking
2847 if (sse < thresh_ac || sse == 0) {
2848 // Skipping threshold for dc
2849 unsigned int thresh_dc;
2851 thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6);
2853 // dc skipping checking
2854 if ((sse - var) < thresh_dc || sse == var) {
2855 unsigned int sse_u, sse_v;
2856 unsigned int var_u, var_v;
2858 var_u = cpi->fn_ptr[uv_size].vf(x->plane[1].src.buf,
2859 x->plane[1].src.stride,
2860 xd->plane[1].dst.buf,
2861 xd->plane[1].dst.stride, &sse_u);
2863 // U skipping condition checking
2864 if ((sse_u * 4 < thresh_ac || sse_u == 0) &&
2865 (sse_u - var_u < thresh_dc || sse_u == var_u)) {
2866 var_v = cpi->fn_ptr[uv_size].vf(x->plane[2].src.buf,
2867 x->plane[2].src.stride,
2868 xd->plane[2].dst.buf,
2869 xd->plane[2].dst.stride, &sse_v);
2871 // V skipping condition checking
2872 if ((sse_v * 4 < thresh_ac || sse_v == 0) &&
2873 (sse_v - var_v < thresh_dc || sse_v == var_v)) {
2876 // The cost of skip bit needs to be added.
2877 *rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
2879 // Scaling factor for SSE from spatial domain to frequency domain
2880 // is 16. Adjust distortion accordingly.
2881 *distortion_uv = (sse_u + sse_v) << 4;
2882 *distortion = (sse << 4) + *distortion_uv;
2885 this_rd = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
2894 int skippable_y, skippable_uv;
2895 int64_t sseuv = INT64_MAX;
2896 int64_t rdcosty = INT64_MAX;
2898 // Y cost and distortion
2899 inter_super_block_yrd(cpi, x, rate_y, distortion_y, &skippable_y, psse,
2900 bsize, txfm_cache, ref_best_rd);
2902 if (*rate_y == INT_MAX) {
2904 *distortion = INT64_MAX;
2905 restore_dst_buf(xd, orig_dst, orig_dst_stride);
2910 *distortion += *distortion_y;
2912 rdcosty = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
2913 rdcosty = MIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse));
2915 super_block_uvrd(cpi, x, rate_uv, distortion_uv, &skippable_uv, &sseuv,
2916 bsize, ref_best_rd - rdcosty);
2917 if (*rate_uv == INT_MAX) {
2919 *distortion = INT64_MAX;
2920 restore_dst_buf(xd, orig_dst, orig_dst_stride);
2926 *distortion += *distortion_uv;
2927 *skippable = skippable_y && skippable_uv;
2930 restore_dst_buf(xd, orig_dst, orig_dst_stride);
2931 return this_rd; // if 0, this will be re-calculated by caller
2934 static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
2936 struct macroblock_plane *const p = x->plane;
2937 struct macroblockd_plane *const pd = x->e_mbd.plane;
2940 for (i = 0; i < max_plane; ++i) {
2941 p[i].coeff = ctx->coeff_pbuf[i][1];
2942 p[i].qcoeff = ctx->qcoeff_pbuf[i][1];
2943 pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1];
2944 p[i].eobs = ctx->eobs_pbuf[i][1];
2946 ctx->coeff_pbuf[i][1] = ctx->coeff_pbuf[i][0];
2947 ctx->qcoeff_pbuf[i][1] = ctx->qcoeff_pbuf[i][0];
2948 ctx->dqcoeff_pbuf[i][1] = ctx->dqcoeff_pbuf[i][0];
2949 ctx->eobs_pbuf[i][1] = ctx->eobs_pbuf[i][0];
2951 ctx->coeff_pbuf[i][0] = p[i].coeff;
2952 ctx->qcoeff_pbuf[i][0] = p[i].qcoeff;
2953 ctx->dqcoeff_pbuf[i][0] = pd[i].dqcoeff;
2954 ctx->eobs_pbuf[i][0] = p[i].eobs;
2958 void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
2959 int *returnrate, int64_t *returndist,
2961 PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
2962 VP9_COMMON *const cm = &cpi->common;
2963 MACROBLOCKD *const xd = &x->e_mbd;
2964 int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
2965 int y_skip = 0, uv_skip = 0;
2966 int64_t dist_y = 0, dist_uv = 0, tx_cache[TX_MODES] = { 0 };
2967 TX_SIZE max_uv_tx_size;
2970 xd->mi[0]->mbmi.ref_frame[0] = INTRA_FRAME;
2972 if (bsize >= BLOCK_8X8) {
2973 if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly,
2974 &dist_y, &y_skip, bsize, tx_cache,
2975 best_rd) >= best_rd) {
2976 *returnrate = INT_MAX;
2979 max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0]->mbmi.tx_size, bsize);
2980 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
2981 &dist_uv, &uv_skip, bsize, max_uv_tx_size);
2984 if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate_y, &rate_y_tokenonly,
2985 &dist_y, best_rd) >= best_rd) {
2986 *returnrate = INT_MAX;
2989 max_uv_tx_size = get_uv_tx_size_impl(xd->mi[0]->mbmi.tx_size, bsize);
2990 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly,
2991 &dist_uv, &uv_skip, BLOCK_8X8, max_uv_tx_size);
2994 if (y_skip && uv_skip) {
2995 *returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
2996 vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
2997 *returndist = dist_y + dist_uv;
2998 vp9_zero(ctx->tx_rd_diff);
3001 *returnrate = rate_y + rate_uv + vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
3002 *returndist = dist_y + dist_uv;
3003 if (cpi->sf.tx_size_search_method == USE_FULL_RD)
3004 for (i = 0; i < TX_MODES; i++) {
3005 if (tx_cache[i] < INT64_MAX && tx_cache[cm->tx_mode] < INT64_MAX)
3006 ctx->tx_rd_diff[i] = tx_cache[i] - tx_cache[cm->tx_mode];
3008 ctx->tx_rd_diff[i] = 0;
3012 ctx->mic = *xd->mi[0];
3015 static INLINE int rd_less_than_thresh(int64_t best_rd, int thresh,
3017 return best_rd < ((int64_t)thresh * thresh_fact >> 5) || thresh == INT_MAX;
3020 // Updating rd_thresh_freq_fact[] here means that the different
3021 // partition/block sizes are handled independently based on the best
3022 // choice for the current partition. It may well be better to keep a scaled
3023 // best rd so far value and update rd_thresh_freq_fact based on the mode/size
3024 // combination that wins out.
3025 static void update_rd_thresh_fact(VP9_COMP *cpi, int bsize,
3026 int best_mode_index) {
3027 if (cpi->sf.adaptive_rd_thresh > 0) {
3028 const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES;
3030 for (mode = 0; mode < top_mode; ++mode) {
3031 int *const fact = &cpi->rd.thresh_freq_fact[bsize][mode];
3033 if (mode == best_mode_index) {
3034 *fact -= (*fact >> 3);
3036 *fact = MIN(*fact + RD_THRESH_INC,
3037 cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT);
3043 int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
3044 const TileInfo *const tile,
3045 int mi_row, int mi_col,
3047 int64_t *returndistortion,
3049 PICK_MODE_CONTEXT *ctx,
3050 int64_t best_rd_so_far) {
3051 VP9_COMMON *const cm = &cpi->common;
3052 RD_OPT *const rd_opt = &cpi->rd;
3053 MACROBLOCKD *const xd = &x->e_mbd;
3054 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
3055 const struct segmentation *const seg = &cm->seg;
3056 PREDICTION_MODE this_mode;
3057 MV_REFERENCE_FRAME ref_frame, second_ref_frame;
3058 unsigned char segment_id = mbmi->segment_id;
3060 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
3061 struct buf_2d yv12_mb[4][MAX_MB_PLANE];
3062 int_mv single_newmv[MAX_REF_FRAMES] = { { 0 } };
3063 static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
3065 int64_t best_rd = best_rd_so_far;
3066 int64_t best_tx_rd[TX_MODES];
3067 int64_t best_tx_diff[TX_MODES];
3068 int64_t best_pred_diff[REFERENCE_MODES];
3069 int64_t best_pred_rd[REFERENCE_MODES];
3070 int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
3071 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
3072 MB_MODE_INFO best_mbmode = { 0 };
3073 int mode_index, best_mode_index = -1;
3074 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
3075 vp9_prob comp_mode_p;
3076 int64_t best_intra_rd = INT64_MAX;
3077 int64_t best_inter_rd = INT64_MAX;
3078 PREDICTION_MODE best_intra_mode = DC_PRED;
3079 MV_REFERENCE_FRAME best_inter_ref_frame = LAST_FRAME;
3080 INTERP_FILTER tmp_best_filter = SWITCHABLE;
3081 int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES];
3082 int64_t dist_uv[TX_SIZES];
3083 int skip_uv[TX_SIZES];
3084 PREDICTION_MODE mode_uv[TX_SIZES];
3085 int64_t mode_distortions[MB_MODE_COUNT] = {-1};
3086 int intra_cost_penalty = 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q);
3087 const int bws = num_8x8_blocks_wide_lookup[bsize] / 2;
3088 const int bhs = num_8x8_blocks_high_lookup[bsize] / 2;
3090 int mode_skip_mask = 0;
3091 int mode_skip_start = cpi->sf.mode_skip_start + 1;
3092 const int *const rd_threshes = rd_opt->threshes[segment_id][bsize];
3093 const int *const rd_thresh_freq_fact = rd_opt->thresh_freq_fact[bsize];
3094 const int mode_search_skip_flags = cpi->sf.mode_search_skip_flags;
3095 const int intra_y_mode_mask =
3096 cpi->sf.intra_y_mode_mask[max_txsize_lookup[bsize]];
3097 int disable_inter_mode_mask = cpi->sf.disable_inter_mode_mask[bsize];
3099 x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
3101 estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
3104 for (i = 0; i < REFERENCE_MODES; ++i)
3105 best_pred_rd[i] = INT64_MAX;
3106 for (i = 0; i < TX_MODES; i++)
3107 best_tx_rd[i] = INT64_MAX;
3108 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
3109 best_filter_rd[i] = INT64_MAX;
3110 for (i = 0; i < TX_SIZES; i++)
3111 rate_uv_intra[i] = INT_MAX;
3112 for (i = 0; i < MAX_REF_FRAMES; ++i)
3113 x->pred_sse[i] = INT_MAX;
3115 *returnrate = INT_MAX;
3117 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3118 x->pred_mv_sad[ref_frame] = INT_MAX;
3119 if (cpi->ref_frame_flags & flag_list[ref_frame]) {
3120 vp9_setup_buffer_inter(cpi, x, tile,
3121 ref_frame, bsize, mi_row, mi_col,
3122 frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb);
3124 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
3125 frame_mv[ZEROMV][ref_frame].as_int = 0;
3128 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3129 // All modes from vp9_mode_order that use this frame as any ref
3130 static const int ref_frame_mask_all[] = {
3131 0x0, 0x123291, 0x25c444, 0x39b722
3133 // Fixed mv modes (NEARESTMV, NEARMV, ZEROMV) from vp9_mode_order that use
3134 // this frame as their primary ref
3135 static const int ref_frame_mask_fixedmv[] = {
3136 0x0, 0x121281, 0x24c404, 0x080102
3138 if (!(cpi->ref_frame_flags & flag_list[ref_frame])) {
3139 // Skip modes for missing references
3140 mode_skip_mask |= ref_frame_mask_all[ref_frame];
3141 } else if (cpi->sf.reference_masking) {
3142 for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
3143 // Skip fixed mv modes for poor references
3144 if ((x->pred_mv_sad[ref_frame] >> 2) > x->pred_mv_sad[i]) {
3145 mode_skip_mask |= ref_frame_mask_fixedmv[ref_frame];
3150 // If the segment reference frame feature is enabled....
3151 // then do nothing if the current ref frame is not allowed..
3152 if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
3153 vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
3154 mode_skip_mask |= ref_frame_mask_all[ref_frame];
3158 // If the segment skip feature is enabled....
3159 // then do nothing if the current mode is not allowed..
3160 if (vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP)) {
3161 const int inter_non_zero_mode_mask = 0x1F7F7;
3162 mode_skip_mask |= inter_non_zero_mode_mask;
3165 // Disable this drop out case if the ref frame
3166 // segment level feature is enabled for this segment. This is to
3167 // prevent the possibility that we end up unable to pick any mode.
3168 if (!vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
3169 // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
3170 // unless ARNR filtering is enabled in which case we want
3171 // an unfiltered alternative. We allow near/nearest as well
3172 // because they may result in zero-zero MVs but be cheaper.
3173 if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
3175 ~((1 << THR_NEARESTA) | (1 << THR_NEARA) | (1 << THR_ZEROA));
3176 if (frame_mv[NEARMV][ALTREF_FRAME].as_int != 0)
3177 mode_skip_mask |= (1 << THR_NEARA);
3178 if (frame_mv[NEARESTMV][ALTREF_FRAME].as_int != 0)
3179 mode_skip_mask |= (1 << THR_NEARESTA);
3183 // TODO(JBB): This is to make up for the fact that we don't have sad
3184 // functions that work when the block size reads outside the umv. We
3185 // should fix this either by making the motion search just work on
3186 // a representative block in the boundary ( first ) and then implement a
3187 // function that does sads when inside the border..
3188 if ((mi_row + bhs) > cm->mi_rows || (mi_col + bws) > cm->mi_cols) {
3189 const int new_modes_mask =
3190 (1 << THR_NEWMV) | (1 << THR_NEWG) | (1 << THR_NEWA) |
3191 (1 << THR_COMP_NEWLA) | (1 << THR_COMP_NEWGA);
3192 mode_skip_mask |= new_modes_mask;
3195 if (bsize > cpi->sf.max_intra_bsize) {
3196 mode_skip_mask |= 0xFF30808;
3199 if (!x->in_active_map) {
3201 assert(cpi->ref_frame_flags & VP9_LAST_FLAG);
3202 if (frame_mv[NEARESTMV][LAST_FRAME].as_int == 0)
3203 mode_index = THR_NEARESTMV;
3204 else if (frame_mv[NEARMV][LAST_FRAME].as_int == 0)
3205 mode_index = THR_NEARMV;
3207 mode_index = THR_ZEROMV;
3208 mode_skip_mask = ~(1 << mode_index);
3209 mode_skip_start = MAX_MODES;
3210 disable_inter_mode_mask = 0;
3213 for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) {
3214 int mode_excluded = 0;
3215 int64_t this_rd = INT64_MAX;
3216 int disable_skip = 0;
3217 int compmode_cost = 0;
3218 int rate2 = 0, rate_y = 0, rate_uv = 0;
3219 int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
3221 int64_t tx_cache[TX_MODES];
3224 int64_t total_sse = INT64_MAX;
3227 // Look at the reference frame of the best mode so far and set the
3228 // skip mask to look at a subset of the remaining modes.
3229 if (mode_index == mode_skip_start && best_mode_index >= 0) {
3230 switch (vp9_mode_order[best_mode_index].ref_frame[0]) {
3234 mode_skip_mask |= LAST_FRAME_MODE_MASK;
3237 mode_skip_mask |= GOLDEN_FRAME_MODE_MASK;
3240 mode_skip_mask |= ALT_REF_MODE_MASK;
3243 case MAX_REF_FRAMES:
3244 assert(0 && "Invalid Reference frame");
3247 if (mode_skip_mask & (1 << mode_index))
3250 // Test best rd so far against threshold for trying this mode.
3251 if (rd_less_than_thresh(best_rd, rd_threshes[mode_index],
3252 rd_thresh_freq_fact[mode_index]))
3255 this_mode = vp9_mode_order[mode_index].mode;
3256 ref_frame = vp9_mode_order[mode_index].ref_frame[0];
3257 if (ref_frame != INTRA_FRAME &&
3258 disable_inter_mode_mask & (1 << INTER_OFFSET(this_mode)))
3260 second_ref_frame = vp9_mode_order[mode_index].ref_frame[1];
3262 comp_pred = second_ref_frame > INTRA_FRAME;
3264 if ((mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
3265 best_mode_index >=0 &&
3266 vp9_mode_order[best_mode_index].ref_frame[0] == INTRA_FRAME)
3268 if ((mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH) &&
3269 ref_frame != best_inter_ref_frame &&
3270 second_ref_frame != best_inter_ref_frame)
3272 mode_excluded = cm->reference_mode == SINGLE_REFERENCE;
3274 if (ref_frame != INTRA_FRAME)
3275 mode_excluded = cm->reference_mode == COMPOUND_REFERENCE;
3278 if (ref_frame == INTRA_FRAME) {
3279 if (!(intra_y_mode_mask & (1 << this_mode)))
3281 if (this_mode != DC_PRED) {
3282 // Disable intra modes other than DC_PRED for blocks with low variance
3283 // Threshold for intra skipping based on source variance
3284 // TODO(debargha): Specialize the threshold for super block sizes
3285 const unsigned int skip_intra_var_thresh = 64;
3286 if ((mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
3287 x->source_variance < skip_intra_var_thresh)
3289 // Only search the oblique modes if the best so far is
3290 // one of the neighboring directional modes
3291 if ((mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
3292 (this_mode >= D45_PRED && this_mode <= TM_PRED)) {
3293 if (best_mode_index >= 0 &&
3294 vp9_mode_order[best_mode_index].ref_frame[0] > INTRA_FRAME)
3297 if (mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
3298 if (conditional_skipintra(this_mode, best_intra_mode))
3303 if (x->in_active_map &&
3304 !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
3305 const MV_REFERENCE_FRAME ref_frames[2] = {ref_frame, second_ref_frame};
3306 if (!check_best_zero_mv(cpi, mbmi->mode_context, frame_mv,
3307 disable_inter_mode_mask, this_mode, ref_frames))
3312 mbmi->mode = this_mode;
3313 mbmi->uv_mode = x->in_active_map ? DC_PRED : this_mode;
3314 mbmi->ref_frame[0] = ref_frame;
3315 mbmi->ref_frame[1] = second_ref_frame;
3316 // Evaluate all sub-pel filters irrespective of whether we can use
3317 // them for this frame.
3318 mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
3319 : cm->interp_filter;
3321 set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
3323 // Select prediction reference frames.
3324 for (i = 0; i < MAX_MB_PLANE; i++) {
3325 xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
3327 xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
3330 for (i = 0; i < TX_MODES; ++i)
3331 tx_cache[i] = INT64_MAX;
3333 if (ref_frame == INTRA_FRAME) {
3335 intra_super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL,
3336 bsize, tx_cache, best_rd);
3338 if (rate_y == INT_MAX)
3341 uv_tx = get_uv_tx_size_impl(mbmi->tx_size, bsize);
3342 if (rate_uv_intra[uv_tx] == INT_MAX) {
3343 choose_intra_uv_mode(cpi, ctx, bsize, uv_tx,
3344 &rate_uv_intra[uv_tx], &rate_uv_tokenonly[uv_tx],
3345 &dist_uv[uv_tx], &skip_uv[uv_tx], &mode_uv[uv_tx]);
3348 rate_uv = rate_uv_tokenonly[uv_tx];
3349 distortion_uv = dist_uv[uv_tx];
3350 skippable = skippable && skip_uv[uv_tx];
3351 mbmi->uv_mode = mode_uv[uv_tx];
3353 rate2 = rate_y + cpi->mbmode_cost[mbmi->mode] + rate_uv_intra[uv_tx];
3354 if (this_mode != DC_PRED && this_mode != TM_PRED)
3355 rate2 += intra_cost_penalty;
3356 distortion2 = distortion_y + distortion_uv;
3358 this_rd = handle_inter_mode(cpi, x, bsize,
3360 &rate2, &distortion2, &skippable,
3361 &rate_y, &distortion_y,
3362 &rate_uv, &distortion_uv,
3363 &mode_excluded, &disable_skip,
3364 &tmp_best_filter, frame_mv,
3366 single_newmv, &total_sse, best_rd);
3367 if (this_rd == INT64_MAX)
3370 compmode_cost = vp9_cost_bit(comp_mode_p, comp_pred);
3372 if (cm->reference_mode == REFERENCE_MODE_SELECT)
3373 rate2 += compmode_cost;
3376 // Estimate the reference frame signaling cost and add it
3377 // to the rolling cost variable.
3379 rate2 += ref_costs_comp[ref_frame];
3381 rate2 += ref_costs_single[ref_frame];
3384 if (!disable_skip) {
3385 // Test for the condition where skip block will be activated
3386 // because there are no non zero coefficients and make any
3387 // necessary adjustment for rate. Ignore if skip is coded at
3388 // segment level as the cost wont have been added in.
3389 // Is Mb level skip allowed (i.e. not coded at segment level).
3390 const int mb_skip_allowed = !vp9_segfeature_active(seg, segment_id,
3394 // Back out the coefficient coding costs
3395 rate2 -= (rate_y + rate_uv);
3396 // for best yrd calculation
3399 if (mb_skip_allowed) {
3402 // Cost the skip mb case
3403 vp9_prob skip_prob = vp9_get_skip_prob(cm, xd);
3405 prob_skip_cost = vp9_cost_bit(skip_prob, 1);
3406 rate2 += prob_skip_cost;
3409 } else if (mb_skip_allowed && ref_frame != INTRA_FRAME && !xd->lossless) {
3410 if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
3411 RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
3412 // Add in the cost of the no skip flag.
3413 rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
3415 // FIXME(rbultje) make this work for splitmv also
3416 rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
3417 distortion2 = total_sse;
3418 assert(total_sse >= 0);
3419 rate2 -= (rate_y + rate_uv);
3424 } else if (mb_skip_allowed) {
3425 // Add in the cost of the no skip flag.
3426 rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
3429 // Calculate the final RD estimate for this mode.
3430 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
3433 if (ref_frame == INTRA_FRAME) {
3434 // Keep record of best intra rd
3435 if (this_rd < best_intra_rd) {
3436 best_intra_rd = this_rd;
3437 best_intra_mode = mbmi->mode;
3440 // Keep record of best inter rd with single reference
3441 if (!comp_pred && !mode_excluded && this_rd < best_inter_rd) {
3442 best_inter_rd = this_rd;
3443 best_inter_ref_frame = ref_frame;
3447 if (!disable_skip && ref_frame == INTRA_FRAME) {
3448 for (i = 0; i < REFERENCE_MODES; ++i)
3449 best_pred_rd[i] = MIN(best_pred_rd[i], this_rd);
3450 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
3451 best_filter_rd[i] = MIN(best_filter_rd[i], this_rd);
3454 // Store the respective mode distortions for later use.
3455 if (mode_distortions[this_mode] == -1
3456 || distortion2 < mode_distortions[this_mode]) {
3457 mode_distortions[this_mode] = distortion2;
3460 // Did this mode help.. i.e. is it the new best mode
3461 if (this_rd < best_rd || x->skip) {
3462 int max_plane = MAX_MB_PLANE;
3463 if (!mode_excluded) {
3464 // Note index of best mode so far
3465 best_mode_index = mode_index;
3467 if (ref_frame == INTRA_FRAME) {
3468 /* required for left and above block mv */
3469 mbmi->mv[0].as_int = 0;
3473 *returnrate = rate2;
3474 *returndistortion = distortion2;
3476 best_mbmode = *mbmi;
3477 best_skip2 = this_skip2;
3478 if (!x->select_txfm_size)
3479 swap_block_ptr(x, ctx, max_plane);
3480 vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
3481 sizeof(uint8_t) * ctx->num_4x4_blk);
3483 // TODO(debargha): enhance this test with a better distortion prediction
3484 // based on qp, activity mask and history
3485 if ((mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
3486 (mode_index > MIN_EARLY_TERM_INDEX)) {
3487 const int qstep = xd->plane[0].dequant[1];
3488 // TODO(debargha): Enhance this by specializing for each mode_index
3490 if (x->source_variance < UINT_MAX) {
3491 const int var_adjust = (x->source_variance < 16);
3492 scale -= var_adjust;
3494 if (ref_frame > INTRA_FRAME &&
3495 distortion2 * scale < qstep * qstep) {
3502 /* keep record of best compound/single-only prediction */
3503 if (!disable_skip && ref_frame != INTRA_FRAME) {
3504 int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
3506 if (cm->reference_mode == REFERENCE_MODE_SELECT) {
3507 single_rate = rate2 - compmode_cost;
3508 hybrid_rate = rate2;
3510 single_rate = rate2;
3511 hybrid_rate = rate2 + compmode_cost;
3514 single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
3515 hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
3518 if (single_rd < best_pred_rd[SINGLE_REFERENCE]) {
3519 best_pred_rd[SINGLE_REFERENCE] = single_rd;
3522 if (single_rd < best_pred_rd[COMPOUND_REFERENCE]) {
3523 best_pred_rd[COMPOUND_REFERENCE] = single_rd;
3526 if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT])
3527 best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
3529 /* keep record of best filter type */
3530 if (!mode_excluded && cm->interp_filter != BILINEAR) {
3531 int64_t ref = rd_opt->filter_cache[cm->interp_filter == SWITCHABLE ?
3532 SWITCHABLE_FILTERS : cm->interp_filter];
3534 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
3536 if (ref == INT64_MAX)
3538 else if (rd_opt->filter_cache[i] == INT64_MAX)
3539 // when early termination is triggered, the encoder does not have
3540 // access to the rate-distortion cost. it only knows that the cost
3541 // should be above the maximum valid value. hence it takes the known
3542 // maximum plus an arbitrary constant as the rate-distortion cost.
3543 adj_rd = rd_opt->mask_filter - ref + 10;
3545 adj_rd = rd_opt->filter_cache[i] - ref;
3548 best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd);
3553 /* keep record of best txfm size */
3554 if (bsize < BLOCK_32X32) {
3555 if (bsize < BLOCK_16X16)
3556 tx_cache[ALLOW_16X16] = tx_cache[ALLOW_8X8];
3558 tx_cache[ALLOW_32X32] = tx_cache[ALLOW_16X16];
3560 if (!mode_excluded && this_rd != INT64_MAX) {
3561 for (i = 0; i < TX_MODES && tx_cache[i] < INT64_MAX; i++) {
3562 int64_t adj_rd = INT64_MAX;
3563 adj_rd = this_rd + tx_cache[i] - tx_cache[cm->tx_mode];
3565 if (adj_rd < best_tx_rd[i])
3566 best_tx_rd[i] = adj_rd;
3573 if (x->skip && !comp_pred)
3577 if (best_mode_index < 0 || best_rd >= best_rd_so_far)
3580 // If we used an estimate for the uv intra rd in the loop above...
3581 if (cpi->sf.use_uv_intra_rd_estimate) {
3582 // Do Intra UV best rd mode selection if best mode choice above was intra.
3583 if (vp9_mode_order[best_mode_index].ref_frame[0] == INTRA_FRAME) {
3585 *mbmi = best_mbmode;
3586 uv_tx_size = get_uv_tx_size(mbmi);
3587 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size],
3588 &rate_uv_tokenonly[uv_tx_size],
3589 &dist_uv[uv_tx_size],
3590 &skip_uv[uv_tx_size],
3591 bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize,
3596 assert((cm->interp_filter == SWITCHABLE) ||
3597 (cm->interp_filter == best_mbmode.interp_filter) ||
3598 !is_inter_block(&best_mbmode));
3600 update_rd_thresh_fact(cpi, bsize, best_mode_index);
3603 *mbmi = best_mbmode;
3604 x->skip |= best_skip2;
3606 for (i = 0; i < REFERENCE_MODES; ++i) {
3607 if (best_pred_rd[i] == INT64_MAX)
3608 best_pred_diff[i] = INT_MIN;
3610 best_pred_diff[i] = best_rd - best_pred_rd[i];
3614 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
3615 if (best_filter_rd[i] == INT64_MAX)
3616 best_filter_diff[i] = 0;
3618 best_filter_diff[i] = best_rd - best_filter_rd[i];
3620 if (cm->interp_filter == SWITCHABLE)
3621 assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
3622 for (i = 0; i < TX_MODES; i++) {
3623 if (best_tx_rd[i] == INT64_MAX)
3624 best_tx_diff[i] = 0;
3626 best_tx_diff[i] = best_rd - best_tx_rd[i];
3629 vp9_zero(best_filter_diff);
3630 vp9_zero(best_tx_diff);
3633 if (!x->in_active_map) {
3634 assert(mbmi->ref_frame[0] == LAST_FRAME);
3635 assert(mbmi->ref_frame[1] == NONE);
3636 assert(mbmi->mode == NEARESTMV ||
3637 mbmi->mode == NEARMV ||
3638 mbmi->mode == ZEROMV);
3639 assert(frame_mv[mbmi->mode][LAST_FRAME].as_int == 0);
3640 assert(mbmi->mode == mbmi->uv_mode);
3643 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3644 store_coding_context(x, ctx, best_mode_index,
3645 &mbmi->ref_mvs[mbmi->ref_frame[0]][0],
3646 &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 :
3647 mbmi->ref_frame[1]][0],
3648 best_pred_diff, best_tx_diff, best_filter_diff);
3654 int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
3655 const TileInfo *const tile,
3656 int mi_row, int mi_col,
3658 int64_t *returndistortion,
3660 PICK_MODE_CONTEXT *ctx,
3661 int64_t best_rd_so_far) {
3662 VP9_COMMON *const cm = &cpi->common;
3663 RD_OPT *const rd_opt = &cpi->rd;
3664 MACROBLOCKD *const xd = &x->e_mbd;
3665 MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
3666 const struct segmentation *const seg = &cm->seg;
3667 MV_REFERENCE_FRAME ref_frame, second_ref_frame;
3668 unsigned char segment_id = mbmi->segment_id;
3670 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
3671 struct buf_2d yv12_mb[4][MAX_MB_PLANE];
3672 static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
3674 int64_t best_rd = best_rd_so_far;
3675 int64_t best_yrd = best_rd_so_far; // FIXME(rbultje) more precise
3676 static const int64_t best_tx_diff[TX_MODES] = { 0 };
3677 int64_t best_pred_diff[REFERENCE_MODES];
3678 int64_t best_pred_rd[REFERENCE_MODES];
3679 int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
3680 int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
3681 MB_MODE_INFO best_mbmode = { 0 };
3682 int ref_index, best_ref_index = 0;
3683 unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
3684 vp9_prob comp_mode_p;
3685 int64_t best_inter_rd = INT64_MAX;
3686 MV_REFERENCE_FRAME best_inter_ref_frame = LAST_FRAME;
3687 INTERP_FILTER tmp_best_filter = SWITCHABLE;
3688 int rate_uv_intra, rate_uv_tokenonly;
3691 PREDICTION_MODE mode_uv = DC_PRED;
3692 int intra_cost_penalty = 20 * vp9_dc_quant(cm->base_qindex, cm->y_dc_delta_q);
3693 int_mv seg_mvs[4][MAX_REF_FRAMES];
3694 b_mode_info best_bmodes[4];
3696 int ref_frame_mask = 0;
3697 int mode_skip_mask = 0;
3699 x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
3700 vpx_memset(x->zcoeff_blk[TX_4X4], 0, 4);
3702 for (i = 0; i < 4; i++) {
3704 for (j = 0; j < MAX_REF_FRAMES; j++)
3705 seg_mvs[i][j].as_int = INVALID_MV;
3708 estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
3711 for (i = 0; i < REFERENCE_MODES; ++i)
3712 best_pred_rd[i] = INT64_MAX;
3713 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
3714 best_filter_rd[i] = INT64_MAX;
3715 rate_uv_intra = INT_MAX;
3717 *returnrate = INT_MAX;
3719 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
3720 if (cpi->ref_frame_flags & flag_list[ref_frame]) {
3721 vp9_setup_buffer_inter(cpi, x, tile,
3722 ref_frame, bsize, mi_row, mi_col,
3723 frame_mv[NEARESTMV], frame_mv[NEARMV],
3726 frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
3727 frame_mv[ZEROMV][ref_frame].as_int = 0;
3730 for (ref_frame = LAST_FRAME;
3731 ref_frame <= ALTREF_FRAME && cpi->sf.reference_masking; ++ref_frame) {
3733 for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
3734 if ((x->pred_mv_sad[ref_frame] >> 1) > x->pred_mv_sad[i]) {
3735 ref_frame_mask |= (1 << ref_frame);
3741 for (ref_index = 0; ref_index < MAX_REFS; ++ref_index) {
3742 int mode_excluded = 0;
3743 int64_t this_rd = INT64_MAX;
3744 int disable_skip = 0;
3745 int compmode_cost = 0;
3746 int rate2 = 0, rate_y = 0, rate_uv = 0;
3747 int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
3751 int64_t total_sse = INT_MAX;
3754 ref_frame = vp9_ref_order[ref_index].ref_frame[0];
3755 second_ref_frame = vp9_ref_order[ref_index].ref_frame[1];
3757 // Look at the reference frame of the best mode so far and set the
3758 // skip mask to look at a subset of the remaining modes.
3759 if (ref_index > 2 && cpi->sf.mode_skip_start < MAX_MODES) {
3760 if (ref_index == 3) {
3761 switch (vp9_ref_order[best_ref_index].ref_frame[0]) {
3766 mode_skip_mask = 0x0010;
3769 mode_skip_mask = 0x0008;
3772 mode_skip_mask = 0x0000;
3775 case MAX_REF_FRAMES:
3776 assert(0 && "Invalid Reference frame");
3779 if (mode_skip_mask & (1 << ref_index))
3783 // Test best rd so far against threshold for trying this mode.
3784 if (rd_less_than_thresh(best_rd,
3785 rd_opt->threshes[segment_id][bsize][ref_index],
3786 rd_opt->thresh_freq_fact[bsize][ref_index]))
3789 if (ref_frame > INTRA_FRAME &&
3790 !(cpi->ref_frame_flags & flag_list[ref_frame])) {
3794 comp_pred = second_ref_frame > INTRA_FRAME;
3796 if (!(cpi->ref_frame_flags & flag_list[second_ref_frame]))
3798 // Do not allow compound prediction if the segment level reference frame
3799 // feature is in use as in this case there can only be one reference.
3800 if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME))
3802 if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
3803 vp9_ref_order[best_ref_index].ref_frame[0] == INTRA_FRAME)
3805 if ((cpi->sf.mode_search_skip_flags & FLAG_SKIP_COMP_REFMISMATCH) &&
3806 ref_frame != best_inter_ref_frame &&
3807 second_ref_frame != best_inter_ref_frame)
3811 // TODO(jingning, jkoleszar): scaling reference frame not supported for
3813 if (ref_frame > INTRA_FRAME &&
3814 vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf))
3817 if (second_ref_frame > INTRA_FRAME &&
3818 vp9_is_scaled(&cm->frame_refs[second_ref_frame - 1].sf))
3822 mode_excluded = mode_excluded ? mode_excluded
3823 : cm->reference_mode == SINGLE_REFERENCE;
3824 } else if (ref_frame != INTRA_FRAME) {
3825 mode_excluded = mode_excluded ? mode_excluded
3826 : cm->reference_mode == COMPOUND_REFERENCE;
3829 // If the segment reference frame feature is enabled....
3830 // then do nothing if the current ref frame is not allowed..
3831 if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
3832 vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) !=
3835 // If the segment skip feature is enabled....
3836 // then do nothing if the current mode is not allowed..
3837 } else if (vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP) &&
3838 ref_frame != INTRA_FRAME) {
3840 // Disable this drop out case if the ref frame
3841 // segment level feature is enabled for this segment. This is to
3842 // prevent the possibility that we end up unable to pick any mode.
3843 } else if (!vp9_segfeature_active(seg, segment_id,
3844 SEG_LVL_REF_FRAME)) {
3845 // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
3846 // unless ARNR filtering is enabled in which case we want
3847 // an unfiltered alternative. We allow near/nearest as well
3848 // because they may result in zero-zero MVs but be cheaper.
3849 if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0))
3853 mbmi->tx_size = TX_4X4;
3854 mbmi->uv_mode = DC_PRED;
3855 mbmi->ref_frame[0] = ref_frame;
3856 mbmi->ref_frame[1] = second_ref_frame;
3857 // Evaluate all sub-pel filters irrespective of whether we can use
3858 // them for this frame.
3859 mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
3860 : cm->interp_filter;
3862 set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
3864 // Select prediction reference frames.
3865 for (i = 0; i < MAX_MB_PLANE; i++) {
3866 xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
3868 xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
3871 if (ref_frame == INTRA_FRAME) {
3873 if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate, &rate_y,
3874 &distortion_y, best_rd) >= best_rd)
3877 rate2 += intra_cost_penalty;
3878 distortion2 += distortion_y;
3880 if (rate_uv_intra == INT_MAX) {
3881 choose_intra_uv_mode(cpi, ctx, bsize, TX_4X4,
3887 rate2 += rate_uv_intra;
3888 rate_uv = rate_uv_tokenonly;
3889 distortion2 += dist_uv;
3890 distortion_uv = dist_uv;
3891 mbmi->uv_mode = mode_uv;
3895 int64_t this_rd_thresh;
3896 int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX;
3897 int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX;
3898 int64_t tmp_best_distortion = INT_MAX, tmp_best_sse, uv_sse;
3899 int tmp_best_skippable = 0;
3900 int switchable_filter_index;
3901 int_mv *second_ref = comp_pred ?
3902 &mbmi->ref_mvs[second_ref_frame][0] : NULL;
3903 b_mode_info tmp_best_bmodes[16];
3904 MB_MODE_INFO tmp_best_mbmode;
3905 BEST_SEG_INFO bsi[SWITCHABLE_FILTERS];
3906 int pred_exists = 0;
3909 this_rd_thresh = (ref_frame == LAST_FRAME) ?
3910 rd_opt->threshes[segment_id][bsize][THR_LAST] :
3911 rd_opt->threshes[segment_id][bsize][THR_ALTR];
3912 this_rd_thresh = (ref_frame == GOLDEN_FRAME) ?
3913 rd_opt->threshes[segment_id][bsize][THR_GOLD] : this_rd_thresh;
3914 rd_opt->mask_filter = 0;
3915 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
3916 rd_opt->filter_cache[i] = INT64_MAX;
3918 if (cm->interp_filter != BILINEAR) {
3919 tmp_best_filter = EIGHTTAP;
3920 if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) {
3921 tmp_best_filter = EIGHTTAP;
3922 } else if (cpi->sf.adaptive_pred_interp_filter == 1 &&
3923 ctx->pred_interp_filter < SWITCHABLE) {
3924 tmp_best_filter = ctx->pred_interp_filter;
3925 } else if (cpi->sf.adaptive_pred_interp_filter == 2) {
3926 tmp_best_filter = ctx->pred_interp_filter < SWITCHABLE ?
3927 ctx->pred_interp_filter : 0;
3929 for (switchable_filter_index = 0;
3930 switchable_filter_index < SWITCHABLE_FILTERS;
3931 ++switchable_filter_index) {
3934 mbmi->interp_filter = switchable_filter_index;
3935 tmp_rd = rd_pick_best_sub8x8_mode(cpi, x, tile,
3936 &mbmi->ref_mvs[ref_frame][0],
3937 second_ref, best_yrd, &rate,
3938 &rate_y, &distortion,
3939 &skippable, &total_sse,
3940 (int) this_rd_thresh, seg_mvs,
3941 bsi, switchable_filter_index,
3944 if (tmp_rd == INT64_MAX)
3946 rs = vp9_get_switchable_rate(cpi);
3947 rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
3948 rd_opt->filter_cache[switchable_filter_index] = tmp_rd;
3949 rd_opt->filter_cache[SWITCHABLE_FILTERS] =
3950 MIN(rd_opt->filter_cache[SWITCHABLE_FILTERS],
3952 if (cm->interp_filter == SWITCHABLE)
3955 rd_opt->mask_filter = MAX(rd_opt->mask_filter, tmp_rd);
3957 newbest = (tmp_rd < tmp_best_rd);
3959 tmp_best_filter = mbmi->interp_filter;
3960 tmp_best_rd = tmp_rd;
3962 if ((newbest && cm->interp_filter == SWITCHABLE) ||
3963 (mbmi->interp_filter == cm->interp_filter &&
3964 cm->interp_filter != SWITCHABLE)) {
3965 tmp_best_rdu = tmp_rd;
3966 tmp_best_rate = rate;
3967 tmp_best_ratey = rate_y;
3968 tmp_best_distortion = distortion;
3969 tmp_best_sse = total_sse;
3970 tmp_best_skippable = skippable;
3971 tmp_best_mbmode = *mbmi;
3972 for (i = 0; i < 4; i++) {
3973 tmp_best_bmodes[i] = xd->mi[0]->bmi[i];
3974 x->zcoeff_blk[TX_4X4][i] = !x->plane[0].eobs[i];
3977 if (switchable_filter_index == 0 &&
3978 cpi->sf.use_rd_breakout &&
3979 best_rd < INT64_MAX) {
3980 if (tmp_best_rdu / 2 > best_rd) {
3981 // skip searching the other filters if the first is
3982 // already substantially larger than the best so far
3983 tmp_best_filter = mbmi->interp_filter;
3984 tmp_best_rdu = INT64_MAX;
3989 } // switchable_filter_index loop
3993 if (tmp_best_rdu == INT64_MAX && pred_exists)
3996 mbmi->interp_filter = (cm->interp_filter == SWITCHABLE ?
3997 tmp_best_filter : cm->interp_filter);
3999 // Handles the special case when a filter that is not in the
4000 // switchable list (bilinear, 6-tap) is indicated at the frame level
4001 tmp_rd = rd_pick_best_sub8x8_mode(cpi, x, tile,
4002 &mbmi->ref_mvs[ref_frame][0],
4003 second_ref, best_yrd, &rate, &rate_y,
4004 &distortion, &skippable, &total_sse,
4005 (int) this_rd_thresh, seg_mvs, bsi, 0,
4007 if (tmp_rd == INT64_MAX)
4010 total_sse = tmp_best_sse;
4011 rate = tmp_best_rate;
4012 rate_y = tmp_best_ratey;
4013 distortion = tmp_best_distortion;
4014 skippable = tmp_best_skippable;
4015 *mbmi = tmp_best_mbmode;
4016 for (i = 0; i < 4; i++)
4017 xd->mi[0]->bmi[i] = tmp_best_bmodes[i];
4021 distortion2 += distortion;
4023 if (cm->interp_filter == SWITCHABLE)
4024 rate2 += vp9_get_switchable_rate(cpi);
4027 mode_excluded = comp_pred ? cm->reference_mode == SINGLE_REFERENCE
4028 : cm->reference_mode == COMPOUND_REFERENCE;
4030 compmode_cost = vp9_cost_bit(comp_mode_p, comp_pred);
4032 tmp_best_rdu = best_rd -
4033 MIN(RDCOST(x->rdmult, x->rddiv, rate2, distortion2),
4034 RDCOST(x->rdmult, x->rddiv, 0, total_sse));
4036 if (tmp_best_rdu > 0) {
4037 // If even the 'Y' rd value of split is higher than best so far
4038 // then dont bother looking at UV
4039 vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col,
4041 super_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,
4042 &uv_sse, BLOCK_8X8, tmp_best_rdu);
4043 if (rate_uv == INT_MAX)
4046 distortion2 += distortion_uv;
4047 skippable = skippable && uv_skippable;
4048 total_sse += uv_sse;
4052 if (cm->reference_mode == REFERENCE_MODE_SELECT)
4053 rate2 += compmode_cost;
4055 // Estimate the reference frame signaling cost and add it
4056 // to the rolling cost variable.
4057 if (second_ref_frame > INTRA_FRAME) {
4058 rate2 += ref_costs_comp[ref_frame];
4060 rate2 += ref_costs_single[ref_frame];
4063 if (!disable_skip) {
4064 // Test for the condition where skip block will be activated
4065 // because there are no non zero coefficients and make any
4066 // necessary adjustment for rate. Ignore if skip is coded at
4067 // segment level as the cost wont have been added in.
4068 // Is Mb level skip allowed (i.e. not coded at segment level).
4069 const int mb_skip_allowed = !vp9_segfeature_active(seg, segment_id,
4072 if (mb_skip_allowed && ref_frame != INTRA_FRAME && !xd->lossless) {
4073 if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) <
4074 RDCOST(x->rdmult, x->rddiv, 0, total_sse)) {
4075 // Add in the cost of the no skip flag.
4076 rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
4078 // FIXME(rbultje) make this work for splitmv also
4079 rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
4080 distortion2 = total_sse;
4081 assert(total_sse >= 0);
4082 rate2 -= (rate_y + rate_uv);
4087 } else if (mb_skip_allowed) {
4088 // Add in the cost of the no skip flag.
4089 rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
4092 // Calculate the final RD estimate for this mode.
4093 this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
4096 // Keep record of best inter rd with single reference
4097 if (is_inter_block(mbmi) &&
4098 !has_second_ref(mbmi) &&
4100 this_rd < best_inter_rd) {
4101 best_inter_rd = this_rd;
4102 best_inter_ref_frame = ref_frame;
4105 if (!disable_skip && ref_frame == INTRA_FRAME) {
4106 for (i = 0; i < REFERENCE_MODES; ++i)
4107 best_pred_rd[i] = MIN(best_pred_rd[i], this_rd);
4108 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
4109 best_filter_rd[i] = MIN(best_filter_rd[i], this_rd);
4112 // Did this mode help.. i.e. is it the new best mode
4113 if (this_rd < best_rd || x->skip) {
4114 if (!mode_excluded) {
4115 int max_plane = MAX_MB_PLANE;
4116 // Note index of best mode so far
4117 best_ref_index = ref_index;
4119 if (ref_frame == INTRA_FRAME) {
4120 /* required for left and above block mv */
4121 mbmi->mv[0].as_int = 0;
4125 *returnrate = rate2;
4126 *returndistortion = distortion2;
4128 best_yrd = best_rd -
4129 RDCOST(x->rdmult, x->rddiv, rate_uv, distortion_uv);
4130 best_mbmode = *mbmi;
4131 best_skip2 = this_skip2;
4132 if (!x->select_txfm_size)
4133 swap_block_ptr(x, ctx, max_plane);
4134 vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[TX_4X4],
4135 sizeof(uint8_t) * ctx->num_4x4_blk);
4137 for (i = 0; i < 4; i++)
4138 best_bmodes[i] = xd->mi[0]->bmi[i];
4140 // TODO(debargha): enhance this test with a better distortion prediction
4141 // based on qp, activity mask and history
4142 if ((cpi->sf.mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
4143 (ref_index > MIN_EARLY_TERM_INDEX)) {
4144 const int qstep = xd->plane[0].dequant[1];
4145 // TODO(debargha): Enhance this by specializing for each mode_index
4147 if (x->source_variance < UINT_MAX) {
4148 const int var_adjust = (x->source_variance < 16);
4149 scale -= var_adjust;
4151 if (ref_frame > INTRA_FRAME &&
4152 distortion2 * scale < qstep * qstep) {
4159 /* keep record of best compound/single-only prediction */
4160 if (!disable_skip && ref_frame != INTRA_FRAME) {
4161 int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
4163 if (cm->reference_mode == REFERENCE_MODE_SELECT) {
4164 single_rate = rate2 - compmode_cost;
4165 hybrid_rate = rate2;
4167 single_rate = rate2;
4168 hybrid_rate = rate2 + compmode_cost;
4171 single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
4172 hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
4174 if (!comp_pred && single_rd < best_pred_rd[SINGLE_REFERENCE]) {
4175 best_pred_rd[SINGLE_REFERENCE] = single_rd;
4176 } else if (comp_pred && single_rd < best_pred_rd[COMPOUND_REFERENCE]) {
4177 best_pred_rd[COMPOUND_REFERENCE] = single_rd;
4179 if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT])
4180 best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
4183 /* keep record of best filter type */
4184 if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME &&
4185 cm->interp_filter != BILINEAR) {
4186 int64_t ref = rd_opt->filter_cache[cm->interp_filter == SWITCHABLE ?
4187 SWITCHABLE_FILTERS : cm->interp_filter];
4189 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
4190 if (ref == INT64_MAX)
4192 else if (rd_opt->filter_cache[i] == INT64_MAX)
4193 // when early termination is triggered, the encoder does not have
4194 // access to the rate-distortion cost. it only knows that the cost
4195 // should be above the maximum valid value. hence it takes the known
4196 // maximum plus an arbitrary constant as the rate-distortion cost.
4197 adj_rd = rd_opt->mask_filter - ref + 10;
4199 adj_rd = rd_opt->filter_cache[i] - ref;
4202 best_filter_rd[i] = MIN(best_filter_rd[i], adj_rd);
4209 if (x->skip && !comp_pred)
4213 if (best_rd >= best_rd_so_far)
4216 // If we used an estimate for the uv intra rd in the loop above...
4217 if (cpi->sf.use_uv_intra_rd_estimate) {
4218 // Do Intra UV best rd mode selection if best mode choice above was intra.
4219 if (vp9_ref_order[best_ref_index].ref_frame[0] == INTRA_FRAME) {
4220 *mbmi = best_mbmode;
4221 rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra,
4229 if (best_rd == INT64_MAX) {
4230 *returnrate = INT_MAX;
4231 *returndistortion = INT64_MAX;
4235 assert((cm->interp_filter == SWITCHABLE) ||
4236 (cm->interp_filter == best_mbmode.interp_filter) ||
4237 !is_inter_block(&best_mbmode));
4239 update_rd_thresh_fact(cpi, bsize, best_ref_index);
4242 *mbmi = best_mbmode;
4243 x->skip |= best_skip2;
4244 if (!is_inter_block(&best_mbmode)) {
4245 for (i = 0; i < 4; i++)
4246 xd->mi[0]->bmi[i].as_mode = best_bmodes[i].as_mode;
4248 for (i = 0; i < 4; ++i)
4249 vpx_memcpy(&xd->mi[0]->bmi[i], &best_bmodes[i], sizeof(b_mode_info));
4251 mbmi->mv[0].as_int = xd->mi[0]->bmi[3].as_mv[0].as_int;
4252 mbmi->mv[1].as_int = xd->mi[0]->bmi[3].as_mv[1].as_int;
4255 for (i = 0; i < REFERENCE_MODES; ++i) {
4256 if (best_pred_rd[i] == INT64_MAX)
4257 best_pred_diff[i] = INT_MIN;
4259 best_pred_diff[i] = best_rd - best_pred_rd[i];
4263 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
4264 if (best_filter_rd[i] == INT64_MAX)
4265 best_filter_diff[i] = 0;
4267 best_filter_diff[i] = best_rd - best_filter_rd[i];
4269 if (cm->interp_filter == SWITCHABLE)
4270 assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
4272 vp9_zero(best_filter_diff);
4275 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
4276 store_coding_context(x, ctx, best_ref_index,
4277 &mbmi->ref_mvs[mbmi->ref_frame[0]][0],
4278 &mbmi->ref_mvs[mbmi->ref_frame[1] < 0 ? 0 :
4279 mbmi->ref_frame[1]][0],
4280 best_pred_diff, best_tx_diff, best_filter_diff);
4285 void vp9_set_rd_speed_thresholds(VP9_COMP *cpi) {
4287 RD_OPT *const rd = &cpi->rd;
4289 // Set baseline threshold values
4290 for (i = 0; i < MAX_MODES; ++i)
4291 rd->thresh_mult[i] = is_best_mode(cpi->oxcf.mode) ? -500 : 0;
4293 rd->thresh_mult[THR_NEARESTMV] = 0;
4294 rd->thresh_mult[THR_NEARESTG] = 0;
4295 rd->thresh_mult[THR_NEARESTA] = 0;
4297 rd->thresh_mult[THR_DC] += 1000;
4299 rd->thresh_mult[THR_NEWMV] += 1000;
4300 rd->thresh_mult[THR_NEWA] += 1000;
4301 rd->thresh_mult[THR_NEWG] += 1000;
4303 rd->thresh_mult[THR_NEARMV] += 1000;
4304 rd->thresh_mult[THR_NEARA] += 1000;
4305 rd->thresh_mult[THR_COMP_NEARESTLA] += 1000;
4306 rd->thresh_mult[THR_COMP_NEARESTGA] += 1000;
4308 rd->thresh_mult[THR_TM] += 1000;
4310 rd->thresh_mult[THR_COMP_NEARLA] += 1500;
4311 rd->thresh_mult[THR_COMP_NEWLA] += 2000;
4312 rd->thresh_mult[THR_NEARG] += 1000;
4313 rd->thresh_mult[THR_COMP_NEARGA] += 1500;
4314 rd->thresh_mult[THR_COMP_NEWGA] += 2000;
4316 rd->thresh_mult[THR_ZEROMV] += 2000;
4317 rd->thresh_mult[THR_ZEROG] += 2000;
4318 rd->thresh_mult[THR_ZEROA] += 2000;
4319 rd->thresh_mult[THR_COMP_ZEROLA] += 2500;
4320 rd->thresh_mult[THR_COMP_ZEROGA] += 2500;
4322 rd->thresh_mult[THR_H_PRED] += 2000;
4323 rd->thresh_mult[THR_V_PRED] += 2000;
4324 rd->thresh_mult[THR_D45_PRED ] += 2500;
4325 rd->thresh_mult[THR_D135_PRED] += 2500;
4326 rd->thresh_mult[THR_D117_PRED] += 2500;
4327 rd->thresh_mult[THR_D153_PRED] += 2500;
4328 rd->thresh_mult[THR_D207_PRED] += 2500;
4329 rd->thresh_mult[THR_D63_PRED] += 2500;
4331 /* disable frame modes if flags not set */
4332 if (!(cpi->ref_frame_flags & VP9_LAST_FLAG)) {
4333 rd->thresh_mult[THR_NEWMV ] = INT_MAX;
4334 rd->thresh_mult[THR_NEARESTMV] = INT_MAX;
4335 rd->thresh_mult[THR_ZEROMV ] = INT_MAX;
4336 rd->thresh_mult[THR_NEARMV ] = INT_MAX;
4338 if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG)) {
4339 rd->thresh_mult[THR_NEARESTG ] = INT_MAX;
4340 rd->thresh_mult[THR_ZEROG ] = INT_MAX;
4341 rd->thresh_mult[THR_NEARG ] = INT_MAX;
4342 rd->thresh_mult[THR_NEWG ] = INT_MAX;
4344 if (!(cpi->ref_frame_flags & VP9_ALT_FLAG)) {
4345 rd->thresh_mult[THR_NEARESTA ] = INT_MAX;
4346 rd->thresh_mult[THR_ZEROA ] = INT_MAX;
4347 rd->thresh_mult[THR_NEARA ] = INT_MAX;
4348 rd->thresh_mult[THR_NEWA ] = INT_MAX;
4351 if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_ALT_FLAG)) !=
4352 (VP9_LAST_FLAG | VP9_ALT_FLAG)) {
4353 rd->thresh_mult[THR_COMP_ZEROLA ] = INT_MAX;
4354 rd->thresh_mult[THR_COMP_NEARESTLA] = INT_MAX;
4355 rd->thresh_mult[THR_COMP_NEARLA ] = INT_MAX;
4356 rd->thresh_mult[THR_COMP_NEWLA ] = INT_MAX;
4358 if ((cpi->ref_frame_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) !=
4359 (VP9_GOLD_FLAG | VP9_ALT_FLAG)) {
4360 rd->thresh_mult[THR_COMP_ZEROGA ] = INT_MAX;
4361 rd->thresh_mult[THR_COMP_NEARESTGA] = INT_MAX;
4362 rd->thresh_mult[THR_COMP_NEARGA ] = INT_MAX;
4363 rd->thresh_mult[THR_COMP_NEWGA ] = INT_MAX;
4367 void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) {
4368 const SPEED_FEATURES *const sf = &cpi->sf;
4369 RD_OPT *const rd = &cpi->rd;
4372 for (i = 0; i < MAX_REFS; ++i)
4373 rd->thresh_mult_sub8x8[i] = is_best_mode(cpi->oxcf.mode) ? -500 : 0;
4375 rd->thresh_mult_sub8x8[THR_LAST] += 2500;
4376 rd->thresh_mult_sub8x8[THR_GOLD] += 2500;
4377 rd->thresh_mult_sub8x8[THR_ALTR] += 2500;
4378 rd->thresh_mult_sub8x8[THR_INTRA] += 2500;
4379 rd->thresh_mult_sub8x8[THR_COMP_LA] += 4500;
4380 rd->thresh_mult_sub8x8[THR_COMP_GA] += 4500;
4382 // Check for masked out split cases.
4383 for (i = 0; i < MAX_REFS; i++)
4384 if (sf->disable_split_mask & (1 << i))
4385 rd->thresh_mult_sub8x8[i] = INT_MAX;
4387 // disable mode test if frame flag is not set
4388 if (!(cpi->ref_frame_flags & VP9_LAST_FLAG))
4389 rd->thresh_mult_sub8x8[THR_LAST] = INT_MAX;
4390 if (!(cpi->ref_frame_flags & VP9_GOLD_FLAG))
4391 rd->thresh_mult_sub8x8[THR_GOLD] = INT_MAX;
4392 if (!(cpi->ref_frame_flags & VP9_ALT_FLAG))
4393 rd->thresh_mult_sub8x8[THR_ALTR] = INT_MAX;
4394 if ((cpi->ref_frame_flags & (VP9_LAST_FLAG | VP9_ALT_FLAG)) !=
4395 (VP9_LAST_FLAG | VP9_ALT_FLAG))
4396 rd->thresh_mult_sub8x8[THR_COMP_LA] = INT_MAX;
4397 if ((cpi->ref_frame_flags & (VP9_GOLD_FLAG | VP9_ALT_FLAG)) !=
4398 (VP9_GOLD_FLAG | VP9_ALT_FLAG))
4399 rd->thresh_mult_sub8x8[THR_COMP_GA] = INT_MAX;