2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
12 #include "./vp9_rtcd.h"
13 #include "./vpx_config.h"
15 #include "vpx_mem/vpx_mem.h"
17 #include "vp9/common/vp9_idct.h"
18 #include "vp9/common/vp9_reconinter.h"
19 #include "vp9/common/vp9_reconintra.h"
20 #include "vp9/common/vp9_systemdependent.h"
22 #include "vp9/encoder/vp9_encodemb.h"
23 #include "vp9/encoder/vp9_quantize.h"
24 #include "vp9/encoder/vp9_rdopt.h"
25 #include "vp9/encoder/vp9_tokenize.h"
28 ENTROPY_CONTEXT ta[MAX_MB_PLANE][16];
29 ENTROPY_CONTEXT tl[MAX_MB_PLANE][16];
32 struct encode_b_args {
34 struct optimize_ctx *ctx;
38 void vp9_subtract_block_c(int rows, int cols,
39 int16_t *diff_ptr, ptrdiff_t diff_stride,
40 const uint8_t *src_ptr, ptrdiff_t src_stride,
41 const uint8_t *pred_ptr, ptrdiff_t pred_stride) {
44 for (r = 0; r < rows; r++) {
45 for (c = 0; c < cols; c++)
46 diff_ptr[c] = src_ptr[c] - pred_ptr[c];
48 diff_ptr += diff_stride;
49 pred_ptr += pred_stride;
50 src_ptr += src_stride;
54 static void subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
55 struct macroblock_plane *const p = &x->plane[plane];
56 const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane];
57 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
58 const int bw = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
59 const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize];
61 vp9_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride,
62 pd->dst.buf, pd->dst.stride);
65 void vp9_subtract_sby(MACROBLOCK *x, BLOCK_SIZE bsize) {
66 subtract_plane(x, bsize, 0);
69 void vp9_subtract_sbuv(MACROBLOCK *x, BLOCK_SIZE bsize) {
72 for (i = 1; i < MAX_MB_PLANE; i++)
73 subtract_plane(x, bsize, i);
76 void vp9_subtract_sb(MACROBLOCK *x, BLOCK_SIZE bsize) {
77 vp9_subtract_sby(x, bsize);
78 vp9_subtract_sbuv(x, bsize);
81 #define RDTRUNC(RM, DM, R, D) ((128 + (R) * (RM)) & 0xFF)
82 typedef struct vp9_token_state vp9_token_state;
84 struct vp9_token_state {
92 // TODO(jimbankoski): experiment to find optimal RD numbers.
96 static const int plane_rd_mult[4] = {
101 #define UPDATE_RD_COST()\
103 rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);\
104 rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);\
105 if (rd_cost0 == rd_cost1) {\
106 rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);\
107 rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);\
111 // This function is a place holder for now but may ultimately need
112 // to scan previous tokens to work out the correct context.
113 static int trellis_get_coeff_context(const int16_t *scan,
116 uint8_t *token_cache) {
117 int bak = token_cache[scan[idx]], pt;
118 token_cache[scan[idx]] = vp9_pt_energy_class[token];
119 pt = get_coef_context(nb, token_cache, idx + 1);
120 token_cache[scan[idx]] = bak;
124 static void optimize_b(MACROBLOCK *mb,
125 int plane, int block, BLOCK_SIZE plane_bsize,
126 ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
128 MACROBLOCKD *const xd = &mb->e_mbd;
129 struct macroblock_plane *p = &mb->plane[plane];
130 struct macroblockd_plane *pd = &xd->plane[plane];
131 const int ref = is_inter_block(&xd->mi_8x8[0]->mbmi);
132 vp9_token_state tokens[1025][2];
133 unsigned best_index[1025][2];
134 const int16_t *coeff_ptr = BLOCK_OFFSET(mb->plane[plane].coeff, block);
136 int16_t *dqcoeff_ptr;
137 int eob = p->eobs[block], final_eob, sz = 0;
140 int64_t rdmult, rddiv, rd_cost0, rd_cost1;
141 int rate0, rate1, error0, error1, t0, t1;
143 PLANE_TYPE type = pd->plane_type;
144 int err_mult = plane_rd_mult[type];
145 const int default_eob = 16 << (tx_size << 1);
147 const int mul = 1 + (tx_size == TX_32X32);
148 uint8_t token_cache[1024];
149 const int16_t *dequant_ptr = pd->dequant;
150 const uint8_t *const band_translate = get_band_translate(tx_size);
151 const scan_order *so = get_scan(xd, tx_size, type, block);
152 const int16_t *scan = so->scan;
153 const int16_t *nb = so->neighbors;
155 assert((!type && !plane) || (type && plane));
156 dqcoeff_ptr = BLOCK_OFFSET(pd->dqcoeff, block);
157 qcoeff_ptr = BLOCK_OFFSET(p->qcoeff, block);
158 assert(eob <= default_eob);
160 /* Now set up a Viterbi trellis to evaluate alternative roundings. */
161 rdmult = mb->rdmult * err_mult;
162 if (!is_inter_block(&mb->e_mbd.mi_8x8[0]->mbmi))
163 rdmult = (rdmult * 9) >> 4;
165 /* Initialize the sentinel node of the trellis. */
166 tokens[eob][0].rate = 0;
167 tokens[eob][0].error = 0;
168 tokens[eob][0].next = default_eob;
169 tokens[eob][0].token = EOB_TOKEN;
170 tokens[eob][0].qc = 0;
171 *(tokens[eob] + 1) = *(tokens[eob] + 0);
173 for (i = 0; i < eob; i++)
174 token_cache[scan[i]] = vp9_pt_energy_class[vp9_dct_value_tokens_ptr[
175 qcoeff_ptr[scan[i]]].token];
177 for (i = eob; i-- > i0;) {
178 int base_bits, d2, dx;
182 /* Only add a trellis state for non-zero coefficients. */
185 error0 = tokens[next][0].error;
186 error1 = tokens[next][1].error;
187 /* Evaluate the first possibility for this state. */
188 rate0 = tokens[next][0].rate;
189 rate1 = tokens[next][1].rate;
190 t0 = (vp9_dct_value_tokens_ptr + x)->token;
191 /* Consider both possible successor states. */
192 if (next < default_eob) {
193 band = band_translate[i + 1];
194 pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache);
196 mb->token_costs[tx_size][type][ref][band][0][pt]
197 [tokens[next][0].token];
199 mb->token_costs[tx_size][type][ref][band][0][pt]
200 [tokens[next][1].token];
203 /* And pick the best. */
204 best = rd_cost1 < rd_cost0;
205 base_bits = *(vp9_dct_value_cost_ptr + x);
206 dx = mul * (dqcoeff_ptr[rc] - coeff_ptr[rc]);
208 tokens[i][0].rate = base_bits + (best ? rate1 : rate0);
209 tokens[i][0].error = d2 + (best ? error1 : error0);
210 tokens[i][0].next = next;
211 tokens[i][0].token = t0;
213 best_index[i][0] = best;
215 /* Evaluate the second possibility for this state. */
216 rate0 = tokens[next][0].rate;
217 rate1 = tokens[next][1].rate;
219 if ((abs(x)*dequant_ptr[rc != 0] > abs(coeff_ptr[rc]) * mul) &&
220 (abs(x)*dequant_ptr[rc != 0] < abs(coeff_ptr[rc]) * mul +
221 dequant_ptr[rc != 0]))
231 /* Consider both possible successor states. */
233 /* If we reduced this coefficient to zero, check to see if
234 * we need to move the EOB back here.
236 t0 = tokens[next][0].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN;
237 t1 = tokens[next][1].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN;
239 t0 = t1 = (vp9_dct_value_tokens_ptr + x)->token;
241 if (next < default_eob) {
242 band = band_translate[i + 1];
243 if (t0 != EOB_TOKEN) {
244 pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache);
245 rate0 += mb->token_costs[tx_size][type][ref][band][!x][pt]
246 [tokens[next][0].token];
248 if (t1 != EOB_TOKEN) {
249 pt = trellis_get_coeff_context(scan, nb, i, t1, token_cache);
250 rate1 += mb->token_costs[tx_size][type][ref][band][!x][pt]
251 [tokens[next][1].token];
256 /* And pick the best. */
257 best = rd_cost1 < rd_cost0;
258 base_bits = *(vp9_dct_value_cost_ptr + x);
261 dx -= (dequant_ptr[rc != 0] + sz) ^ sz;
264 tokens[i][1].rate = base_bits + (best ? rate1 : rate0);
265 tokens[i][1].error = d2 + (best ? error1 : error0);
266 tokens[i][1].next = next;
267 tokens[i][1].token = best ? t1 : t0;
269 best_index[i][1] = best;
270 /* Finally, make this the new head of the trellis. */
273 /* There's no choice to make for a zero coefficient, so we don't
274 * add a new trellis node, but we do need to update the costs.
276 band = band_translate[i + 1];
277 t0 = tokens[next][0].token;
278 t1 = tokens[next][1].token;
279 /* Update the cost of each path if we're past the EOB token. */
280 if (t0 != EOB_TOKEN) {
281 tokens[next][0].rate +=
282 mb->token_costs[tx_size][type][ref][band][1][0][t0];
283 tokens[next][0].token = ZERO_TOKEN;
285 if (t1 != EOB_TOKEN) {
286 tokens[next][1].rate +=
287 mb->token_costs[tx_size][type][ref][band][1][0][t1];
288 tokens[next][1].token = ZERO_TOKEN;
290 best_index[i][0] = best_index[i][1] = 0;
291 /* Don't update next, because we didn't add a new node. */
295 /* Now pick the best path through the whole trellis. */
296 band = band_translate[i + 1];
297 pt = combine_entropy_contexts(*a, *l);
298 rate0 = tokens[next][0].rate;
299 rate1 = tokens[next][1].rate;
300 error0 = tokens[next][0].error;
301 error1 = tokens[next][1].error;
302 t0 = tokens[next][0].token;
303 t1 = tokens[next][1].token;
304 rate0 += mb->token_costs[tx_size][type][ref][band][0][pt][t0];
305 rate1 += mb->token_costs[tx_size][type][ref][band][0][pt][t1];
307 best = rd_cost1 < rd_cost0;
309 vpx_memset(qcoeff_ptr, 0, sizeof(*qcoeff_ptr) * (16 << (tx_size * 2)));
310 vpx_memset(dqcoeff_ptr, 0, sizeof(*dqcoeff_ptr) * (16 << (tx_size * 2)));
311 for (i = next; i < eob; i = next) {
312 x = tokens[i][best].qc;
318 dqcoeff_ptr[rc] = (x * dequant_ptr[rc != 0]) / mul;
320 next = tokens[i][best].next;
321 best = best_index[i][best];
325 mb->plane[plane].eobs[block] = final_eob;
326 *a = *l = (final_eob > 0);
329 void vp9_optimize_b(int plane, int block, BLOCK_SIZE plane_bsize,
330 TX_SIZE tx_size, MACROBLOCK *mb, struct optimize_ctx *ctx) {
332 txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x, &y);
333 optimize_b(mb, plane, block, plane_bsize,
334 &ctx->ta[plane][x], &ctx->tl[plane][y], tx_size);
337 static void optimize_init_b(int plane, BLOCK_SIZE bsize,
338 struct encode_b_args *args) {
339 const MACROBLOCKD *xd = &args->x->e_mbd;
340 const struct macroblockd_plane* const pd = &xd->plane[plane];
341 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
342 const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
343 const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
344 const MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
345 const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi) : mbmi->tx_size;
347 vp9_get_entropy_contexts(tx_size, args->ctx->ta[plane], args->ctx->tl[plane],
348 pd->above_context, pd->left_context,
349 num_4x4_w, num_4x4_h);
351 void vp9_xform_quant(MACROBLOCK *x, int plane, int block,
352 BLOCK_SIZE plane_bsize, TX_SIZE tx_size) {
353 MACROBLOCKD *const xd = &x->e_mbd;
354 struct macroblock_plane *const p = &x->plane[plane];
355 struct macroblockd_plane *const pd = &xd->plane[plane];
356 int16_t *coeff = BLOCK_OFFSET(p->coeff, block);
357 int16_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
358 int16_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
359 const scan_order *scan_order;
360 uint16_t *eob = &p->eobs[block];
361 const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
364 txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
365 src_diff = &p->src_diff[4 * (j * diff_stride + i)];
369 scan_order = &vp9_default_scan_orders[TX_32X32];
370 if (x->use_lp32x32fdct)
371 vp9_fdct32x32_rd(src_diff, coeff, diff_stride);
373 vp9_fdct32x32(src_diff, coeff, diff_stride);
374 vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
375 p->quant, p->quant_shift, qcoeff, dqcoeff,
376 pd->dequant, p->zbin_extra, eob, scan_order->scan,
380 scan_order = &vp9_default_scan_orders[TX_16X16];
381 vp9_fdct16x16(src_diff, coeff, diff_stride);
382 vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
383 p->quant, p->quant_shift, qcoeff, dqcoeff,
384 pd->dequant, p->zbin_extra, eob,
385 scan_order->scan, scan_order->iscan);
388 scan_order = &vp9_default_scan_orders[TX_8X8];
389 vp9_fdct8x8(src_diff, coeff, diff_stride);
390 vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round,
391 p->quant, p->quant_shift, qcoeff, dqcoeff,
392 pd->dequant, p->zbin_extra, eob,
393 scan_order->scan, scan_order->iscan);
396 scan_order = &vp9_default_scan_orders[TX_4X4];
397 x->fwd_txm4x4(src_diff, coeff, diff_stride);
398 vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
399 p->quant, p->quant_shift, qcoeff, dqcoeff,
400 pd->dequant, p->zbin_extra, eob,
401 scan_order->scan, scan_order->iscan);
408 static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
409 TX_SIZE tx_size, void *arg) {
410 struct encode_b_args *const args = arg;
411 MACROBLOCK *const x = args->x;
412 MACROBLOCKD *const xd = &x->e_mbd;
413 struct optimize_ctx *const ctx = args->ctx;
414 struct macroblock_plane *const p = &x->plane[plane];
415 struct macroblockd_plane *const pd = &xd->plane[plane];
416 int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
419 txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
420 dst = &pd->dst.buf[4 * j * pd->dst.stride + 4 * i];
422 // TODO(jingning): per transformed block zero forcing only enabled for
423 // luma component. will integrate chroma components as well.
424 if (x->zcoeff_blk[tx_size][block] && plane == 0) {
426 ctx->ta[plane][i] = 0;
427 ctx->tl[plane][j] = 0;
432 vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
434 if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
435 vp9_optimize_b(plane, block, plane_bsize, tx_size, x, ctx);
437 ctx->ta[plane][i] = p->eobs[block] > 0;
438 ctx->tl[plane][j] = p->eobs[block] > 0;
444 if (x->skip_encode || p->eobs[block] == 0)
449 vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
452 vp9_idct16x16_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
455 vp9_idct8x8_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
458 // this is like vp9_short_idct4x4 but has a special case around eob<=1
459 // which is significant (not just an optimization) for the lossless
461 xd->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
464 assert(0 && "Invalid transform size");
467 static void encode_block_pass1(int plane, int block, BLOCK_SIZE plane_bsize,
468 TX_SIZE tx_size, void *arg) {
469 struct encode_b_args *const args = arg;
470 MACROBLOCK *const x = args->x;
471 MACROBLOCKD *const xd = &x->e_mbd;
472 struct macroblock_plane *const p = &x->plane[plane];
473 struct macroblockd_plane *const pd = &xd->plane[plane];
474 int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
477 txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
478 dst = &pd->dst.buf[4 * j * pd->dst.stride + 4 * i];
480 vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
482 if (p->eobs[block] == 0)
485 xd->itxm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
488 void vp9_encode_sby(MACROBLOCK *x, BLOCK_SIZE bsize) {
489 MACROBLOCKD *const xd = &x->e_mbd;
490 struct optimize_ctx ctx;
491 MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
492 struct encode_b_args arg = {x, &ctx, &mbmi->skip};
494 vp9_subtract_sby(x, bsize);
496 optimize_init_b(0, bsize, &arg);
498 vp9_foreach_transformed_block_in_plane(xd, bsize, 0, encode_block_pass1,
502 void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) {
503 MACROBLOCKD *const xd = &x->e_mbd;
504 struct optimize_ctx ctx;
505 MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
506 struct encode_b_args arg = {x, &ctx, &mbmi->skip};
509 vp9_subtract_sb(x, bsize);
511 if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
513 for (i = 0; i < MAX_MB_PLANE; ++i)
514 optimize_init_b(i, bsize, &arg);
517 vp9_foreach_transformed_block(xd, bsize, encode_block, &arg);
520 static void encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize,
521 TX_SIZE tx_size, void *arg) {
522 struct encode_b_args* const args = arg;
523 MACROBLOCK *const x = args->x;
524 MACROBLOCKD *const xd = &x->e_mbd;
525 MB_MODE_INFO *mbmi = &xd->mi_8x8[0]->mbmi;
526 struct macroblock_plane *const p = &x->plane[plane];
527 struct macroblockd_plane *const pd = &xd->plane[plane];
528 int16_t *coeff = BLOCK_OFFSET(p->coeff, block);
529 int16_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block);
530 int16_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
531 const scan_order *scan_order;
533 MB_PREDICTION_MODE mode;
534 const int bwl = b_width_log2(plane_bsize);
535 const int diff_stride = 4 * (1 << bwl);
538 uint16_t *eob = &p->eobs[block];
540 txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j);
541 dst = &pd->dst.buf[4 * (j * pd->dst.stride + i)];
542 src = &p->src.buf[4 * (j * p->src.stride + i)];
543 src_diff = &p->src_diff[4 * (j * diff_stride + i)];
546 // vp9_optimize_b(plane, block, plane_bsize, tx_size, x, args->ctx);
550 scan_order = &vp9_default_scan_orders[TX_32X32];
551 mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
552 vp9_predict_intra_block(xd, block >> 6, bwl, TX_32X32, mode,
553 x->skip_encode ? src : dst,
554 x->skip_encode ? p->src.stride : pd->dst.stride,
555 dst, pd->dst.stride, i, j, plane);
556 if (!x->skip_recode) {
557 vp9_subtract_block(32, 32, src_diff, diff_stride,
558 src, p->src.stride, dst, pd->dst.stride);
559 if (x->use_lp32x32fdct)
560 vp9_fdct32x32_rd(src_diff, coeff, diff_stride);
562 vp9_fdct32x32(src_diff, coeff, diff_stride);
563 vp9_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round,
564 p->quant, p->quant_shift, qcoeff, dqcoeff,
565 pd->dequant, p->zbin_extra, eob, scan_order->scan,
568 if (!x->skip_encode && *eob)
569 vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, *eob);
572 tx_type = get_tx_type_16x16(pd->plane_type, xd);
573 scan_order = &vp9_scan_orders[TX_16X16][tx_type];
574 mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
575 vp9_predict_intra_block(xd, block >> 4, bwl, TX_16X16, mode,
576 x->skip_encode ? src : dst,
577 x->skip_encode ? p->src.stride : pd->dst.stride,
578 dst, pd->dst.stride, i, j, plane);
579 if (!x->skip_recode) {
580 vp9_subtract_block(16, 16, src_diff, diff_stride,
581 src, p->src.stride, dst, pd->dst.stride);
582 vp9_fht16x16(src_diff, coeff, diff_stride, tx_type);
583 vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
584 p->quant, p->quant_shift, qcoeff, dqcoeff,
585 pd->dequant, p->zbin_extra, eob, scan_order->scan,
588 if (!x->skip_encode && *eob)
589 vp9_iht16x16_add(tx_type, dqcoeff, dst, pd->dst.stride, *eob);
592 tx_type = get_tx_type_8x8(pd->plane_type, xd);
593 scan_order = &vp9_scan_orders[TX_8X8][tx_type];
594 mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
595 vp9_predict_intra_block(xd, block >> 2, bwl, TX_8X8, mode,
596 x->skip_encode ? src : dst,
597 x->skip_encode ? p->src.stride : pd->dst.stride,
598 dst, pd->dst.stride, i, j, plane);
599 if (!x->skip_recode) {
600 vp9_subtract_block(8, 8, src_diff, diff_stride,
601 src, p->src.stride, dst, pd->dst.stride);
602 vp9_fht8x8(src_diff, coeff, diff_stride, tx_type);
603 vp9_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant,
604 p->quant_shift, qcoeff, dqcoeff,
605 pd->dequant, p->zbin_extra, eob, scan_order->scan,
608 if (!x->skip_encode && *eob)
609 vp9_iht8x8_add(tx_type, dqcoeff, dst, pd->dst.stride, *eob);
612 tx_type = get_tx_type_4x4(pd->plane_type, xd, block);
613 scan_order = &vp9_scan_orders[TX_4X4][tx_type];
614 if (mbmi->sb_type < BLOCK_8X8 && plane == 0)
615 mode = xd->mi_8x8[0]->bmi[block].as_mode;
617 mode = plane == 0 ? mbmi->mode : mbmi->uv_mode;
619 vp9_predict_intra_block(xd, block, bwl, TX_4X4, mode,
620 x->skip_encode ? src : dst,
621 x->skip_encode ? p->src.stride : pd->dst.stride,
622 dst, pd->dst.stride, i, j, plane);
624 if (!x->skip_recode) {
625 vp9_subtract_block(4, 4, src_diff, diff_stride,
626 src, p->src.stride, dst, pd->dst.stride);
627 if (tx_type != DCT_DCT)
628 vp9_fht4x4(src_diff, coeff, diff_stride, tx_type);
630 x->fwd_txm4x4(src_diff, coeff, diff_stride);
631 vp9_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant,
632 p->quant_shift, qcoeff, dqcoeff,
633 pd->dequant, p->zbin_extra, eob, scan_order->scan,
637 if (!x->skip_encode && *eob) {
638 if (tx_type == DCT_DCT)
639 // this is like vp9_short_idct4x4 but has a special case around eob<=1
640 // which is significant (not just an optimization) for the lossless
642 xd->itxm_add(dqcoeff, dst, pd->dst.stride, *eob);
644 vp9_iht4x4_16_add(dqcoeff, dst, pd->dst.stride, tx_type);
654 void vp9_encode_block_intra(MACROBLOCK *x, int plane, int block,
655 BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
656 unsigned char *skip) {
657 struct encode_b_args arg = {x, NULL, skip};
658 encode_block_intra(plane, block, plane_bsize, tx_size, &arg);
662 void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
663 const MACROBLOCKD *const xd = &x->e_mbd;
664 struct encode_b_args arg = {x, NULL, &xd->mi_8x8[0]->mbmi.skip};
666 vp9_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block_intra,
670 int vp9_encode_intra(MACROBLOCK *x, int use_16x16_pred) {
671 MB_MODE_INFO * mbmi = &x->e_mbd.mi_8x8[0]->mbmi;
673 mbmi->mode = DC_PRED;
674 mbmi->ref_frame[0] = INTRA_FRAME;
675 mbmi->tx_size = use_16x16_pred ? (mbmi->sb_type >= BLOCK_16X16 ? TX_16X16
678 vp9_encode_intra_block_plane(x, mbmi->sb_type, 0);
679 return vp9_get_mb_ss(x->plane[0].src_diff);