From: John Koleszar Date: Fri, 3 May 2013 00:05:14 +0000 (-0700) Subject: Separate transform and quant from vp9_encode_sb X-Git-Tag: v1.3.0~1106^2~64^2 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=4529c68b3bf90194446919324a5bf9be62105b21;p=platform%2Fupstream%2Flibvpx.git Separate transform and quant from vp9_encode_sb This allows removing a large number of transform size specific functions, as well as supporting 444/alpha by routing all code through the subsampling-aware path. Change-Id: Ieb085cebe9f37f24fc24de179898b22abfda08a4 --- diff --git a/vp9/common/vp9_invtrans.c b/vp9/common/vp9_invtrans.c index 458f135..01859df 100644 --- a/vp9/common/vp9_invtrans.c +++ b/vp9/common/vp9_invtrans.c @@ -19,153 +19,3 @@ void vp9_inverse_transform_b_4x4(MACROBLOCKD *xd, int eob, else xd->inv_txm4x4(dqcoeff, diff, pitch); } - -void vp9_inverse_transform_b_8x8(int16_t *input_dqcoeff, int16_t *output_coeff, - int pitch) { - vp9_short_idct8x8(input_dqcoeff, output_coeff, pitch); -} - -void vp9_inverse_transform_b_16x16(int16_t *input_dqcoeff, - int16_t *output_coeff, int pitch) { - vp9_short_idct16x16(input_dqcoeff, output_coeff, pitch); -} - -void vp9_inverse_transform_sby_32x32(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize) - 3, bw = 1 << bwl; - const int bh = 1 << (b_height_log2(bsize) - 3); - const int stride = 32 << bwl; - int n; - - for (n = 0; n < bw * bh; n++) { - const int x_idx = n & (bw - 1), y_idx = n >> bwl; - const int offset = x_idx * 32 + y_idx * 32 * stride; - - vp9_short_idct32x32(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 1024), - xd->plane[0].diff + offset, stride * 2); - } -} - -void vp9_inverse_transform_sby_16x16(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize) - 2, bw = 1 << bwl; - const int bh = 1 << (b_height_log2(bsize) - 2); - const int stride = 16 << bwl, bstride = 4 << bwl; - int n; - - for (n = 0; n < bw * bh; n++) { - const int x_idx = n & (bw - 1), y_idx = n >> bwl; - const TX_TYPE tx_type = get_tx_type_16x16(xd, - (y_idx * bstride + x_idx) * 4); - const int offset = x_idx * 16 + y_idx * 16 * stride; - - if (tx_type == DCT_DCT) { - vp9_inverse_transform_b_16x16(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 256), - xd->plane[0].diff + offset, stride * 2); - } else { - vp9_short_iht16x16(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 256), - xd->plane[0].diff + offset, stride, tx_type); - } - } -} - -void vp9_inverse_transform_sby_8x8(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize) - 1, bw = 1 << bwl; - const int bh = 1 << (b_height_log2(bsize) - 1); - const int stride = 8 << bwl, bstride = 2 << bwl; - int n; - - for (n = 0; n < bw * bh; n++) { - const int x_idx = n & (bw - 1), y_idx = n >> bwl; - const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * bstride + x_idx) * 2); - const int offset = x_idx * 8 + y_idx * 8 * stride; - - if (tx_type == DCT_DCT) { - vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 64), - xd->plane[0].diff + offset, stride * 2); - } else { - vp9_short_iht8x8(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 64), - xd->plane[0].diff + offset, stride, tx_type); - } - } -} - -void vp9_inverse_transform_sby_4x4(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize), bw = 1 << bwl; - const int bh = 1 << b_height_log2(bsize); - const int stride = 4 << bwl, bstride = 1 << bwl; - int n; - - for (n = 0; n < bw * bh; n++) { - const int x_idx = n & (bw - 1), y_idx = n >> bwl; - const TX_TYPE tx_type = get_tx_type_4x4(xd, y_idx * bstride + x_idx); - const int offset = x_idx * 4 + y_idx * 4 * stride; - - if (tx_type == DCT_DCT) { - vp9_inverse_transform_b_4x4(xd, xd->plane[0].eobs[n], - BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 16), - xd->plane[0].diff + offset, stride * 2); - } else { - vp9_short_iht4x4(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 16), - xd->plane[0].diff + offset, stride, tx_type); - } - } -} - -void vp9_inverse_transform_sbuv_32x32(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) { - assert(bsize == BLOCK_SIZE_SB64X64); - - vp9_short_idct32x32(xd->plane[1].dqcoeff, xd->plane[1].diff, 64); - vp9_short_idct32x32(xd->plane[2].dqcoeff, xd->plane[2].diff, 64); -} - -void vp9_inverse_transform_sbuv_16x16(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize) - 2, bhl = b_height_log2(bsize) - 2; - const int bw = 1 << (bwl - 1), bh = 1 << (bhl - 1); - const int stride = 16 << (bwl - 1); - int n; - - for (n = 0; n < bw * bh; n++) { - const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1); - const int off = x_idx * 16 + y_idx * stride * 16; - - vp9_inverse_transform_b_16x16(BLOCK_OFFSET(xd->plane[1].dqcoeff, n, 256), - xd->plane[1].diff + off, stride * 2); - vp9_inverse_transform_b_16x16(BLOCK_OFFSET(xd->plane[2].dqcoeff, n, 256), - xd->plane[2].diff + off, stride * 2); - } -} - -void vp9_inverse_transform_sbuv_8x8(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize) - 1, bhl = b_height_log2(bsize) - 1; - const int bw = 1 << (bwl - 1), bh = 1 << (bhl - 1); - const int stride = 8 << (bwl - 1); - int n; - - for (n = 0; n < bw * bh; n++) { - const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1); - const int off = x_idx * 8 + y_idx * stride * 8; - - vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[1].dqcoeff, n, 64), - xd->plane[1].diff + off, stride * 2); - vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[2].dqcoeff, n, 64), - xd->plane[2].diff + off, stride * 2); - } -} - -void vp9_inverse_transform_sbuv_4x4(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize); - const int bw = 1 << (bwl - 1), bh = 1 << (bhl - 1); - const int stride = 4 << (bwl - 1); - int n; - - for (n = 0; n < bw * bh; n++) { - const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1); - const int off = x_idx * 4 + y_idx * stride * 4; - - vp9_inverse_transform_b_4x4(xd, xd->plane[1].eobs[n], - BLOCK_OFFSET(xd->plane[1].dqcoeff, n, 16), - xd->plane[1].diff + off, stride * 2); - vp9_inverse_transform_b_4x4(xd, xd->plane[2].eobs[n], - BLOCK_OFFSET(xd->plane[2].dqcoeff, n, 16), - xd->plane[2].diff + off, stride * 2); - } -} diff --git a/vp9/common/vp9_invtrans.h b/vp9/common/vp9_invtrans.h index aeac9a0..2aeb584 100644 --- a/vp9/common/vp9_invtrans.h +++ b/vp9/common/vp9_invtrans.h @@ -18,20 +18,4 @@ void vp9_inverse_transform_b_4x4(MACROBLOCKD *xd, int eob, int16_t *dqcoeff, int16_t *diff, int pitch); - -void vp9_inverse_transform_b_8x8(int16_t *input_dqcoeff, - int16_t *output_coeff, int pitch); - -void vp9_inverse_transform_b_16x16(int16_t *input_dqcoeff, - int16_t *output_coeff, int pitch); - -void vp9_inverse_transform_sby_32x32(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize); -void vp9_inverse_transform_sby_16x16(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize); -void vp9_inverse_transform_sby_8x8(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize); -void vp9_inverse_transform_sby_4x4(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize); -void vp9_inverse_transform_sbuv_32x32(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize); -void vp9_inverse_transform_sbuv_16x16(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize); -void vp9_inverse_transform_sbuv_8x8(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize); -void vp9_inverse_transform_sbuv_4x4(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize); - #endif // VP9_COMMON_VP9_INVTRANS_H_ diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index 83c1102..e6c24f0 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -164,12 +164,12 @@ struct macroblock { void (*fwd_txm16x16)(int16_t *input, int16_t *output, int pitch); void (*quantize_b_4x4)(MACROBLOCK *x, int b_idx, TX_TYPE tx_type, int y_blocks); +#if !CONFIG_SB8X8 void (*quantize_b_4x4_pair)(MACROBLOCK *x, int b_idx1, int b_idx2, int y_blocks); - void (*quantize_b_16x16)(MACROBLOCK *x, int b_idx, TX_TYPE tx_type, - int y_blocks); void (*quantize_b_8x8)(MACROBLOCK *x, int b_idx, TX_TYPE tx_type, int y_blocks); +#endif }; #endif // VP9_ENCODER_VP9_BLOCK_H_ diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 9c07b9c..4117852 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -2435,13 +2435,7 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, vp9_encode_intra4x4mby(x, bsize); vp9_build_intra_predictors_sbuv_s(&x->e_mbd, bsize); - vp9_subtract_sbuv(x, bsize); - vp9_transform_sbuv_4x4(x, bsize); - vp9_quantize_sbuv_4x4(x, bsize); - if (x->optimize) - vp9_optimize_sbuv(cm, x, bsize); - vp9_inverse_transform_sbuv_4x4(xd, bsize); - vp9_recon_sbuv(xd, bsize); + vp9_encode_sbuv(cm, x, bsize); if (output_enabled) sum_intra_stats(cpi, x); diff --git a/vp9/encoder/vp9_encodeintra.c b/vp9/encoder/vp9_encodeintra.c index c5f29fe..d5574db 100644 --- a/vp9/encoder/vp9_encodeintra.c +++ b/vp9/encoder/vp9_encodeintra.c @@ -104,63 +104,16 @@ void vp9_encode_intra4x4mby(MACROBLOCK *mb, BLOCK_SIZE_TYPE bsize) { void vp9_encode_intra16x16mby(VP9_COMMON *const cm, MACROBLOCK *x) { MACROBLOCKD *xd = &x->e_mbd; - TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size; vp9_build_intra_predictors_sby_s(xd, BLOCK_SIZE_MB16X16); - vp9_subtract_sby(x, BLOCK_SIZE_MB16X16); - - switch (tx_size) { - case TX_16X16: - vp9_transform_sby_16x16(x, BLOCK_SIZE_MB16X16); - vp9_quantize_sby_16x16(x, BLOCK_SIZE_MB16X16); - if (x->optimize) - vp9_optimize_sby(cm, x, BLOCK_SIZE_MB16X16); - vp9_inverse_transform_sby_16x16(xd, BLOCK_SIZE_MB16X16); - break; - case TX_8X8: - vp9_transform_sby_8x8(x, BLOCK_SIZE_MB16X16); - vp9_quantize_sby_8x8(x, BLOCK_SIZE_MB16X16); - if (x->optimize) - vp9_optimize_sby(cm, x, BLOCK_SIZE_MB16X16); - vp9_inverse_transform_sby_8x8(xd, BLOCK_SIZE_MB16X16); - break; - default: - vp9_transform_sby_4x4(x, BLOCK_SIZE_MB16X16); - vp9_quantize_sby_4x4(x, BLOCK_SIZE_MB16X16); - if (x->optimize) - vp9_optimize_sby(cm, x, BLOCK_SIZE_MB16X16); - vp9_inverse_transform_sby_4x4(xd, BLOCK_SIZE_MB16X16); - break; - } - - vp9_recon_sby(xd, BLOCK_SIZE_MB16X16); + vp9_encode_sby(cm, x, BLOCK_SIZE_MB16X16); } void vp9_encode_intra16x16mbuv(VP9_COMMON *const cm, MACROBLOCK *x) { MACROBLOCKD *xd = &x->e_mbd; - TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size; vp9_build_intra_predictors_sbuv_s(xd, BLOCK_SIZE_MB16X16); - vp9_subtract_sbuv(x, BLOCK_SIZE_MB16X16); - - switch (tx_size) { - case TX_4X4: - vp9_transform_sbuv_4x4(x, BLOCK_SIZE_MB16X16); - vp9_quantize_sbuv_4x4(x, BLOCK_SIZE_MB16X16); - if (x->optimize) - vp9_optimize_sbuv(cm, x, BLOCK_SIZE_MB16X16); - vp9_inverse_transform_sbuv_4x4(xd, BLOCK_SIZE_MB16X16); - break; - default: // 16x16 or 8x8 - vp9_transform_sbuv_8x8(x, BLOCK_SIZE_MB16X16); - vp9_quantize_sbuv_8x8(x, BLOCK_SIZE_MB16X16); - if (x->optimize) - vp9_optimize_sbuv(cm, x, BLOCK_SIZE_MB16X16); - vp9_inverse_transform_sbuv_8x8(xd, BLOCK_SIZE_MB16X16); - break; - } - - vp9_recon_sbuv(xd, BLOCK_SIZE_MB16X16); + vp9_encode_sbuv(cm, x, BLOCK_SIZE_MB16X16); } #if !CONFIG_SB8X8 diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 5f00b70..6e28f90 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -67,143 +67,6 @@ void vp9_subtract_sb(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { } -void vp9_transform_sby_32x32(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize) - 3, bw = 1 << bwl; - const int bh = 1 << (b_height_log2(bsize) - 3); - const int stride = 32 << bwl; - int n; - - for (n = 0; n < bw * bh; n++) { - const int x_idx = n & (bw - 1), y_idx = n >> bwl; - - vp9_short_fdct32x32(x->plane[0].src_diff + y_idx * stride * 32 + x_idx * 32, - x->plane[0].coeff + n * 1024, stride * 2); - } -} - -void vp9_transform_sby_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize) - 2, bw = 1 << bwl; - const int bh = 1 << (b_height_log2(bsize) - 2); - const int stride = 16 << bwl, bstride = 4 << bwl; - MACROBLOCKD *const xd = &x->e_mbd; - int n; - - for (n = 0; n < bw * bh; n++) { - const int x_idx = n & (bw - 1), y_idx = n >> bwl; - const TX_TYPE tx_type = get_tx_type_16x16(xd, - (y_idx * bstride + x_idx) * 4); - - if (tx_type != DCT_DCT) { - vp9_short_fht16x16(x->plane[0].src_diff + - y_idx * stride * 16 + x_idx * 16, - x->plane[0].coeff + n * 256, stride, tx_type); - } else { - x->fwd_txm16x16(x->plane[0].src_diff + y_idx * stride * 16 + x_idx * 16, - x->plane[0].coeff + n * 256, stride * 2); - } - } -} - -void vp9_transform_sby_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize) - 1, bw = 1 << bwl; - const int bh = 1 << (b_height_log2(bsize) - 1); - const int stride = 8 << bwl, bstride = 2 << bwl; - MACROBLOCKD *const xd = &x->e_mbd; - int n; - - for (n = 0; n < bw * bh; n++) { - const int x_idx = n & (bw - 1), y_idx = n >> bwl; - const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * bstride + x_idx) * 2); - - if (tx_type != DCT_DCT) { - vp9_short_fht8x8(x->plane[0].src_diff + y_idx * stride * 8 + x_idx * 8, - x->plane[0].coeff + n * 64, stride, tx_type); - } else { - x->fwd_txm8x8(x->plane[0].src_diff + y_idx * stride * 8 + x_idx * 8, - x->plane[0].coeff + n * 64, stride * 2); - } - } -} - -void vp9_transform_sby_4x4(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize), bw = 1 << bwl; - const int bh = 1 << b_height_log2(bsize); - const int stride = 4 << bwl; - MACROBLOCKD *const xd = &x->e_mbd; - int n; - - for (n = 0; n < bw * bh; n++) { - const int x_idx = n & (bw - 1), y_idx = n >> bwl; - const TX_TYPE tx_type = get_tx_type_4x4(xd, n); - - if (tx_type != DCT_DCT) { - vp9_short_fht4x4(x->plane[0].src_diff + y_idx * stride * 4 + x_idx * 4, - x->plane[0].coeff + n * 16, stride, tx_type); - } else { - x->fwd_txm4x4(x->plane[0].src_diff + y_idx * stride * 4 + x_idx * 4, - x->plane[0].coeff + n * 16, stride * 2); - } - } -} - -void vp9_transform_sbuv_32x32(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { - assert(bsize == BLOCK_SIZE_SB64X64); - vp9_clear_system_state(); - vp9_short_fdct32x32(x->plane[1].src_diff, x->plane[1].coeff, 64); - vp9_short_fdct32x32(x->plane[2].src_diff, x->plane[2].coeff, 64); -} - -void vp9_transform_sbuv_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize) - 2, bhl = b_height_log2(bsize) - 2; - const int bw = 1 << (bwl - 1), bh = 1 << (bhl - 1); - const int stride = 16 << (bwl - 1); - int n; - - vp9_clear_system_state(); - for (n = 0; n < bw * bh; n++) { - const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1); - - x->fwd_txm16x16(x->plane[1].src_diff + y_idx * stride * 16 + x_idx * 16, - x->plane[1].coeff + n * 256, stride * 2); - x->fwd_txm16x16(x->plane[2].src_diff + y_idx * stride * 16 + x_idx * 16, - x->plane[2].coeff + n * 256, stride * 2); - } -} - -void vp9_transform_sbuv_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize) - 1, bhl = b_height_log2(bsize) - 1; - const int bw = 1 << (bwl - 1), bh = 1 << (bhl - 1); - const int stride = 8 << (bwl - 1); - int n; - - vp9_clear_system_state(); - for (n = 0; n < bw * bh; n++) { - const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1); - - x->fwd_txm8x8(x->plane[1].src_diff + y_idx * stride * 8 + x_idx * 8, - x->plane[1].coeff + n * 64, stride * 2); - x->fwd_txm8x8(x->plane[2].src_diff + y_idx * stride * 8 + x_idx * 8, - x->plane[2].coeff + n * 64, stride * 2); - } -} - -void vp9_transform_sbuv_4x4(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize); - const int bw = 1 << (bwl - 1), bh = 1 << (bhl - 1); - const int stride = 4 << (bwl - 1); - int n; - - vp9_clear_system_state(); - for (n = 0; n < bw * bh; n++) { - const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1); - - x->fwd_txm4x4(x->plane[1].src_diff + y_idx * stride * 4 + x_idx * 4, - x->plane[1].coeff + n * 16, stride * 2); - x->fwd_txm4x4(x->plane[2].src_diff + y_idx * stride * 4 + x_idx * 4, - x->plane[2].coeff + n * 16, stride * 2); - } -} - #define RDTRUNC(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF ) #define RDTRUNC_8x8(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF ) typedef struct vp9_token_state vp9_token_state; @@ -561,7 +424,7 @@ struct encode_b_args { struct optimize_ctx *ctx; }; -static void encode_block(int plane, int block, BLOCK_SIZE_TYPE bsize, +static void xform_quant(int plane, int block, BLOCK_SIZE_TYPE bsize, int ss_txfrm_size, void *arg) { struct encode_b_args* const args = arg; MACROBLOCK* const x = args->x; @@ -572,9 +435,6 @@ static void encode_block(int plane, int block, BLOCK_SIZE_TYPE bsize, int16_t* const src_diff = raster_block_offset_int16(xd, bsize, plane, raster_block, x->plane[plane].src_diff); - int16_t* const diff = raster_block_offset_int16(xd, bsize, plane, - raster_block, - xd->plane[plane].diff); TX_TYPE tx_type = DCT_DCT; switch (ss_txfrm_size / 2) { @@ -624,6 +484,23 @@ static void encode_block(int plane, int block, BLOCK_SIZE_TYPE bsize, } vp9_quantize(x, plane, block, 16 << ss_txfrm_size, tx_type); +} + +static void encode_block(int plane, int block, BLOCK_SIZE_TYPE bsize, + int ss_txfrm_size, void *arg) { + struct encode_b_args* const args = arg; + MACROBLOCK* const x = args->x; + MACROBLOCKD* const xd = &x->e_mbd; + const int bw = 4 << (b_width_log2(bsize) - xd->plane[plane].subsampling_x); + const int raster_block = txfrm_block_to_raster_block(xd, bsize, plane, + block, ss_txfrm_size); + int16_t* const diff = raster_block_offset_int16(xd, bsize, plane, + raster_block, + xd->plane[plane].diff); + TX_TYPE tx_type = DCT_DCT; + + xform_quant(plane, block, bsize, ss_txfrm_size, arg); + if (x->optimize) vp9_optimize_b(plane, block, bsize, ss_txfrm_size, args->cm, x, args->ctx); @@ -633,6 +510,7 @@ static void encode_block(int plane, int block, BLOCK_SIZE_TYPE bsize, diff, bw * 2); break; case TX_16X16: + tx_type = plane == 0 ? get_tx_type_16x16(xd, raster_block) : DCT_DCT; if (tx_type == DCT_DCT) { vp9_short_idct16x16(BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16), diff, bw * 2); @@ -642,6 +520,7 @@ static void encode_block(int plane, int block, BLOCK_SIZE_TYPE bsize, } break; case TX_8X8: + tx_type = plane == 0 ? get_tx_type_8x8(xd, raster_block) : DCT_DCT; if (tx_type == DCT_DCT) { vp9_short_idct8x8(BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16), diff, bw * 2); @@ -651,6 +530,7 @@ static void encode_block(int plane, int block, BLOCK_SIZE_TYPE bsize, } break; case TX_4X4: + tx_type = plane == 0 ? get_tx_type_4x4(xd, raster_block) : DCT_DCT; if (tx_type == DCT_DCT) { // this is like vp9_short_idct4x4 but has a special case around eob<=1 // which is significant (not just an optimization) for the lossless @@ -665,6 +545,60 @@ static void encode_block(int plane, int block, BLOCK_SIZE_TYPE bsize, } } +void vp9_xform_quant_sby(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize) { + MACROBLOCKD* const xd = &x->e_mbd; + struct encode_b_args arg = {cm, x, NULL}; + + foreach_transformed_block_in_plane(xd, bsize, 0, +#if !CONFIG_SB8X8 + 0, +#endif + xform_quant, &arg); +} + +void vp9_xform_quant_sbuv(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize) { + MACROBLOCKD* const xd = &x->e_mbd; + struct encode_b_args arg = {cm, x, NULL}; + + foreach_transformed_block_uv(xd, bsize, xform_quant, &arg); +} + +void vp9_encode_sby(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize) { + MACROBLOCKD* const xd = &x->e_mbd; + struct optimize_ctx ctx; + struct encode_b_args arg = {cm, x, &ctx}; + + vp9_subtract_sby(x, bsize); + if (x->optimize) + vp9_optimize_init(xd, bsize, &ctx); + + foreach_transformed_block_in_plane(xd, bsize, 0, +#if !CONFIG_SB8X8 + 0, +#endif + encode_block, &arg); + + vp9_recon_sby(xd, bsize); +} + +void vp9_encode_sbuv(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize) { + MACROBLOCKD* const xd = &x->e_mbd; + struct optimize_ctx ctx; + struct encode_b_args arg = {cm, x, &ctx}; + + vp9_subtract_sbuv(x, bsize); + if (x->optimize) + vp9_optimize_init(xd, bsize, &ctx); + + foreach_transformed_block_uv(xd, bsize, encode_block, &arg); + + vp9_recon_sbuv(xd, bsize); +} + void vp9_encode_sb(VP9_COMMON *const cm, MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { MACROBLOCKD* const xd = &x->e_mbd; diff --git a/vp9/encoder/vp9_encodemb.h b/vp9/encoder/vp9_encodemb.h index 8322479..afbe446 100644 --- a/vp9/encoder/vp9_encodemb.h +++ b/vp9/encoder/vp9_encodemb.h @@ -22,18 +22,6 @@ typedef struct { MV_REFERENCE_FRAME second_ref_frame; } MODE_DEFINITION; - -#if !CONFIG_SB8X8 -#endif -void vp9_transform_sby_32x32(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); -void vp9_transform_sby_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); -void vp9_transform_sby_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); -void vp9_transform_sby_4x4(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); -void vp9_transform_sbuv_32x32(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); -void vp9_transform_sbuv_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); -void vp9_transform_sbuv_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); -void vp9_transform_sbuv_4x4(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); - struct optimize_ctx { ENTROPY_CONTEXT ta[MAX_MB_PLANE][16]; ENTROPY_CONTEXT tl[MAX_MB_PLANE][16]; @@ -49,6 +37,14 @@ void vp9_optimize_sbuv(VP9_COMMON *const cm, MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); void vp9_encode_sb(VP9_COMMON *const cm, MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); +void vp9_encode_sby(VP9_COMMON *const cm, MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); +void vp9_encode_sbuv(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize); + +void vp9_xform_quant_sby(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize); +void vp9_xform_quant_sbuv(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize); void vp9_subtract_block(int rows, int cols, int16_t *diff_ptr, int diff_stride, diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 738d6e6..300fa32 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -867,9 +867,10 @@ void vp9_set_speed_features(VP9_COMP *cpi) { } cpi->mb.quantize_b_4x4 = vp9_regular_quantize_b_4x4; +#if !CONFIG_SB8X8 cpi->mb.quantize_b_4x4_pair = vp9_regular_quantize_b_4x4_pair; cpi->mb.quantize_b_8x8 = vp9_regular_quantize_b_8x8; - cpi->mb.quantize_b_16x16 = vp9_regular_quantize_b_16x16; +#endif vp9_init_quantizer(cpi); diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c index 2de01d0..e8dd0e9 100644 --- a/vp9/encoder/vp9_quantize.c +++ b/vp9/encoder/vp9_quantize.c @@ -133,6 +133,7 @@ void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type, pt_scan, 1); } +#if !CONFIG_SB8X8 void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type, int y_blocks) { MACROBLOCKD *const xd = &mb->e_mbd; @@ -154,131 +155,6 @@ void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type, pt_scan, 1); } -void vp9_regular_quantize_b_16x16(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type, - int y_blocks) { - MACROBLOCKD *const xd = &mb->e_mbd; - const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, b_idx); - const int *pt_scan = get_scan_16x16(tx_type); - - quantize(mb->plane[pb_idx.plane].zrun_zbin_boost, - BLOCK_OFFSET(mb->plane[pb_idx.plane].coeff, pb_idx.block, 16), - 256, mb->skip_block, - mb->plane[pb_idx.plane].zbin, - mb->plane[pb_idx.plane].round, - mb->plane[pb_idx.plane].quant, - mb->plane[pb_idx.plane].quant_shift, - BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff, pb_idx.block, 16), - BLOCK_OFFSET(xd->plane[pb_idx.plane].dqcoeff, pb_idx.block, 16), - xd->plane[pb_idx.plane].dequant, - mb->plane[pb_idx.plane].zbin_extra, - &xd->plane[pb_idx.plane].eobs[pb_idx.block], - pt_scan, 1); -} - -void vp9_regular_quantize_b_32x32(MACROBLOCK *mb, int b_idx, int y_blocks) { - MACROBLOCKD *const xd = &mb->e_mbd; - const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, b_idx); - - quantize(mb->plane[pb_idx.plane].zrun_zbin_boost, - BLOCK_OFFSET(mb->plane[pb_idx.plane].coeff, pb_idx.block, 16), - 1024, mb->skip_block, - mb->plane[pb_idx.plane].zbin, - mb->plane[pb_idx.plane].round, - mb->plane[pb_idx.plane].quant, - mb->plane[pb_idx.plane].quant_shift, - BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff, pb_idx.block, 16), - BLOCK_OFFSET(xd->plane[pb_idx.plane].dqcoeff, pb_idx.block, 16), - xd->plane[pb_idx.plane].dequant, - mb->plane[pb_idx.plane].zbin_extra, - &xd->plane[pb_idx.plane].eobs[pb_idx.block], - vp9_default_zig_zag1d_32x32, 2); -} - -void vp9_quantize_sby_32x32(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { - const int bw = 1 << (b_width_log2(bsize) - 3); - const int bh = 1 << (b_height_log2(bsize) - 3); - int n; - - for (n = 0; n < bw * bh; n++) - vp9_regular_quantize_b_32x32(x, n * 64, bw * bh * 64); -} - -void vp9_quantize_sby_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize) - 2, bw = 1 << bwl; - const int bh = 1 << (b_height_log2(bsize) - 2); - const int bstride = 16 << bwl; - int n; - - for (n = 0; n < bw * bh; n++) { - const int x_idx = n & (bw - 1), y_idx = n >> bwl; - TX_TYPE tx_type = get_tx_type_16x16(&x->e_mbd, - 4 * x_idx + y_idx * bstride); - x->quantize_b_16x16(x, n * 16, tx_type, 16 * bw * bh); - } -} - -void vp9_quantize_sby_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize) - 1, bw = 1 << bwl; - const int bh = 1 << (b_height_log2(bsize) - 1); - const int bstride = 4 << bwl; - int n; - - for (n = 0; n < bw * bh; n++) { - const int x_idx = n & (bw - 1), y_idx = n >> bwl; - TX_TYPE tx_type = get_tx_type_8x8(&x->e_mbd, - 2 * x_idx + y_idx * bstride); - x->quantize_b_8x8(x, n * 4, tx_type, 4 * bw * bh); - } -} - -void vp9_quantize_sby_4x4(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize), bw = 1 << bwl; - const int bh = 1 << b_height_log2(bsize); - MACROBLOCKD *const xd = &x->e_mbd; - int n; - - for (n = 0; n < bw * bh; n++) { - const TX_TYPE tx_type = get_tx_type_4x4(xd, n); - x->quantize_b_4x4(x, n, tx_type, bw * bh); - } -} - -void vp9_quantize_sbuv_32x32(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { - assert(bsize == BLOCK_SIZE_SB64X64); - vp9_regular_quantize_b_32x32(x, 256, 256); - vp9_regular_quantize_b_32x32(x, 320, 256); -} - -void vp9_quantize_sbuv_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize) - 2; - const int bhl = b_height_log2(bsize) - 2; - const int uoff = 16 << (bhl + bwl); - int i; - - for (i = uoff; i < ((uoff * 3) >> 1); i += 16) - x->quantize_b_16x16(x, i, DCT_DCT, uoff); -} - -void vp9_quantize_sbuv_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize) - 1; - const int bhl = b_height_log2(bsize) - 1; - const int uoff = 4 << (bhl + bwl); - int i; - - for (i = uoff; i < ((uoff * 3) >> 1); i += 4) - x->quantize_b_8x8(x, i, DCT_DCT, uoff); -} - -void vp9_quantize_sbuv_4x4(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize); - const int bhl = b_height_log2(bsize); - const int uoff = 1 << (bhl + bwl); - int i; - - for (i = uoff; i < ((uoff * 3) >> 1); i++) - x->quantize_b_4x4(x, i, DCT_DCT, uoff); -} - /* quantize_b_pair function pointer in MACROBLOCK structure is set to one of * these two C functions if corresponding optimized routine is not available. * NEON optimized version implements currently the fast quantization for pair @@ -288,6 +164,7 @@ void vp9_regular_quantize_b_4x4_pair(MACROBLOCK *x, int b_idx1, int b_idx2, vp9_regular_quantize_b_4x4(x, b_idx1, DCT_DCT, y_blocks); vp9_regular_quantize_b_4x4(x, b_idx2, DCT_DCT, y_blocks); } +#endif static void invert_quant(int16_t *quant, uint8_t *shift, int d) { unsigned t; diff --git a/vp9/encoder/vp9_quantize.h b/vp9/encoder/vp9_quantize.h index 718a127..2b1eeab 100644 --- a/vp9/encoder/vp9_quantize.h +++ b/vp9/encoder/vp9_quantize.h @@ -31,20 +31,6 @@ void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type, int y_blocks); void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type, int y_blocks); -void vp9_regular_quantize_b_16x16(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type, - int y_blocks); -void vp9_regular_quantize_b_32x32(MACROBLOCK *mb, int b_idx, - int y_blocks); - -void vp9_quantize_sby_32x32(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); -void vp9_quantize_sby_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); -void vp9_quantize_sby_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); -void vp9_quantize_sby_4x4(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); -void vp9_quantize_sbuv_32x32(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); -void vp9_quantize_sbuv_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); -void vp9_quantize_sbuv_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); -void vp9_quantize_sbuv_4x4(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); - struct VP9_COMP; extern void vp9_set_quantizer(struct VP9_COMP *cpi, int Q); diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index cf4b1e8..eb0ff9e 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -293,7 +293,7 @@ int vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff, int block_size) { } static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, - int ib, PLANE_TYPE type, + int plane, int block, PLANE_TYPE type, ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L, TX_SIZE tx_size, @@ -304,10 +304,9 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, int c = 0; int cost = 0, pad; const int *scan, *nb; - const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, ib); - const int eob = xd->plane[pb_idx.plane].eobs[pb_idx.block]; - const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff, - pb_idx.block, 16); + const int eob = xd->plane[plane].eobs[block]; + const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff, + block, 16); const int ref = mbmi->ref_frame != INTRA_FRAME; unsigned int (*token_costs)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] = mb->token_costs[tx_size][type][ref]; @@ -334,7 +333,7 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, #endif // Check for consistency of tx_size with mode info - assert((!type && !pb_idx.plane) || (type && pb_idx.plane)); + assert((!type && !plane) || (type && plane)); if (type == PLANE_TYPE_Y_WITH_DC) { assert(xd->mode_info_context->mbmi.txfm_size == tx_size); } else { @@ -345,7 +344,7 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, switch (tx_size) { case TX_4X4: { tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? - get_tx_type_4x4(xd, ib) : DCT_DCT; + get_tx_type_4x4(xd, block) : DCT_DCT; above_ec = A[0] != 0; left_ec = L[0] != 0; coef_probs = cm->fc.coef_probs_4x4; @@ -359,7 +358,7 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, case TX_8X8: { const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type; const int sz = 1 + b_width_log2(sb_type); - const int x = ib & ((1 << sz) - 1), y = ib - x; + const int x = block & ((1 << sz) - 1), y = block - x; TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? get_tx_type_8x8(xd, y + (x >> 1)) : DCT_DCT; above_ec = (A[0] + A[1]) != 0; @@ -375,7 +374,7 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, case TX_16X16: { const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type; const int sz = 2 + b_width_log2(sb_type); - const int x = ib & ((1 << sz) - 1), y = ib - x; + const int x = block & ((1 << sz) - 1), y = block - x; TX_TYPE tx_type = (type == PLANE_TYPE_Y_WITH_DC) ? get_tx_type_16x16(xd, y + (x >> 2)) : DCT_DCT; scan = get_scan_16x16(tx_type); @@ -615,9 +614,10 @@ static int block_error(int16_t *coeff, int16_t *dqcoeff, return error > INT_MAX ? INT_MAX : (int)error; } -static int block_error_sby(MACROBLOCK *x, int block_size, int shift) { +static int block_error_sby(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) { + const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize); return block_error(x->plane[0].coeff, x->e_mbd.plane[0].dqcoeff, - block_size, shift); + 16 << (bwl + bhl), shift); } static int block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) { @@ -635,155 +635,54 @@ static int block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) { return sum > INT_MAX ? INT_MAX : (int)sum; } -static int rdcost_sby_4x4(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize), bw = 1 << bwl; - const int bh = 1 << b_height_log2(bsize); - int cost = 0, b; +static int rdcost_plane(VP9_COMMON *const cm, MACROBLOCK *x, + int plane, BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) { MACROBLOCKD *const xd = &x->e_mbd; + const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x; + const int bhl = b_height_log2(bsize) - xd->plane[plane].subsampling_y; + const int bw = 1 << bwl, bh = 1 << bhl; ENTROPY_CONTEXT t_above[16], t_left[16]; + int block, cost; - vpx_memcpy(&t_above, xd->plane[0].above_context, + vpx_memcpy(&t_above, xd->plane[plane].above_context, sizeof(ENTROPY_CONTEXT) * bw); - vpx_memcpy(&t_left, xd->plane[0].left_context, + vpx_memcpy(&t_left, xd->plane[plane].left_context, sizeof(ENTROPY_CONTEXT) * bh); - for (b = 0; b < bw * bh; b++) { - const int x_idx = b & (bw - 1), y_idx = b >> bwl; - cost += cost_coeffs(cm, x, b, PLANE_TYPE_Y_WITH_DC, - t_above + x_idx, t_left + y_idx, - TX_4X4, bw * bh); - } - - return cost; -} - -static void super_block_yrd_4x4(VP9_COMMON *const cm, MACROBLOCK *x, - int *rate, int *distortion, int *skippable, - BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize); - MACROBLOCKD *const xd = &x->e_mbd; - - xd->mode_info_context->mbmi.txfm_size = TX_4X4; - vp9_transform_sby_4x4(x, bsize); - vp9_quantize_sby_4x4(x, bsize); - - *distortion = block_error_sby(x, 16 << (bwl + bhl), 2); - *rate = rdcost_sby_4x4(cm, x, bsize); - *skippable = vp9_sby_is_skippable(xd, bsize); -} - -static int rdcost_sby_8x8(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize) - 1, bw = 1 << bwl; - const int bh = 1 << (b_height_log2(bsize) - 1); - int cost = 0, b; - MACROBLOCKD *const xd = &x->e_mbd; - ENTROPY_CONTEXT t_above[16], t_left[16]; - - vpx_memcpy(&t_above, xd->plane[0].above_context, - sizeof(ENTROPY_CONTEXT) * 2 * bw); - vpx_memcpy(&t_left, xd->plane[0].left_context, - sizeof(ENTROPY_CONTEXT) * 2 * bh); - - for (b = 0; b < bw * bh; b++) { - const int x_idx = b & (bw - 1), y_idx = b >> bwl; - cost += cost_coeffs(cm, x, b * 4, PLANE_TYPE_Y_WITH_DC, - t_above + x_idx * 2, t_left + y_idx * 2, - TX_8X8, 4 * bw * bh); - } - - return cost; -} - -static void super_block_yrd_8x8(VP9_COMMON *const cm, MACROBLOCK *x, - int *rate, int *distortion, int *skippable, - BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize); - MACROBLOCKD *const xd = &x->e_mbd; - - xd->mode_info_context->mbmi.txfm_size = TX_8X8; - vp9_transform_sby_8x8(x, bsize); - vp9_quantize_sby_8x8(x, bsize); - - *distortion = block_error_sby(x, 16 << (bhl + bwl), 2); - *rate = rdcost_sby_8x8(cm, x, bsize); - *skippable = vp9_sby_is_skippable(xd, bsize); -} + cost = 0; + for (block = 0; block < bw * bh; block += 1 << (tx_size * 2)) { + int x_idx, y_idx; -static int rdcost_sby_16x16(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize) - 2, bw = 1 << bwl; - const int bh = 1 << (b_height_log2(bsize) - 2); - int cost = 0, b; - MACROBLOCKD *const xd = &x->e_mbd; - ENTROPY_CONTEXT t_above[16], t_left[16]; - - vpx_memcpy(&t_above, xd->plane[0].above_context, - sizeof(ENTROPY_CONTEXT) * 4 * bw); - vpx_memcpy(&t_left, xd->plane[0].left_context, - sizeof(ENTROPY_CONTEXT) * 4 * bh); + txfrm_block_to_raster_xy(xd, bsize, plane, block, tx_size * 2, + &x_idx, &y_idx); - for (b = 0; b < bw * bh; b++) { - const int x_idx = b & (bw - 1), y_idx = b >> bwl; - cost += cost_coeffs(cm, x, b * 16, PLANE_TYPE_Y_WITH_DC, - t_above + x_idx * 4, t_left + y_idx * 4, - TX_16X16, bw * bh * 16); + cost += cost_coeffs(cm, x, plane, block, xd->plane[plane].plane_type, + t_above + x_idx, t_left + y_idx, + tx_size, bw * bh); } return cost; } -static void super_block_yrd_16x16(VP9_COMMON *const cm, MACROBLOCK *x, - int *rate, int *distortion, int *skippable, - BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize); - MACROBLOCKD *const xd = &x->e_mbd; - - xd->mode_info_context->mbmi.txfm_size = TX_16X16; - vp9_transform_sby_16x16(x, bsize); - vp9_quantize_sby_16x16(x, bsize); - - *distortion = block_error_sby(x, 16 << (bwl + bhl), 2); - *rate = rdcost_sby_16x16(cm, x, bsize); - *skippable = vp9_sby_is_skippable(xd, bsize); -} - -static int rdcost_sby_32x32(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize) - 3, bw = 1 << bwl; - const int bh = 1 << (b_height_log2(bsize) - 3); - int cost = 0, b; - MACROBLOCKD * const xd = &x->e_mbd; - ENTROPY_CONTEXT t_above[16], t_left[16]; - - vpx_memcpy(&t_above, xd->plane[0].above_context, - sizeof(ENTROPY_CONTEXT) * 8 * bw); - vpx_memcpy(&t_left, xd->plane[0].left_context, - sizeof(ENTROPY_CONTEXT) * 8 * bh); +static int rdcost_uv(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) { + int cost = 0, plane; - for (b = 0; b < bw * bh; b++) { - const int x_idx = b & (bw - 1), y_idx = b >> bwl; - cost += cost_coeffs(cm, x, b * 64, PLANE_TYPE_Y_WITH_DC, - t_above + x_idx * 8, t_left + y_idx * 8, - TX_32X32, bw * bh * 64); + for (plane = 1; plane < MAX_MB_PLANE; plane++) { + cost += rdcost_plane(cm, x, plane, bsize, tx_size); } - return cost; } -static void super_block_yrd_32x32(VP9_COMMON *const cm, MACROBLOCK *x, - int *rate, int *distortion, int *skippable, - BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize); +static void super_block_yrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x, + int *rate, int *distortion, int *skippable, + BLOCK_SIZE_TYPE bsize, TX_SIZE tx_size) { MACROBLOCKD *const xd = &x->e_mbd; + xd->mode_info_context->mbmi.txfm_size = tx_size; + vp9_xform_quant_sby(cm, x, bsize); - xd->mode_info_context->mbmi.txfm_size = TX_32X32; - vp9_transform_sby_32x32(x, bsize); - vp9_quantize_sby_32x32(x, bsize); - - *distortion = block_error_sby(x, 16 << (bwl + bhl), 0); - *rate = rdcost_sby_32x32(cm, x, bsize); + *distortion = block_error_sby(x, bsize, tx_size == TX_32X32 ? 0 : 2); + *rate = rdcost_plane(cm, x, 0, bsize, tx_size); *skippable = vp9_sby_is_skippable(xd, bsize); } @@ -797,13 +696,15 @@ static void super_block_yrd(VP9_COMP *cpi, vp9_subtract_sby(x, bs); if (bs >= BLOCK_SIZE_SB32X32) - super_block_yrd_32x32(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32], - bs); + super_block_yrd_for_txfm(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32], + bs, TX_32X32); if (bs >= BLOCK_SIZE_MB16X16) - super_block_yrd_16x16(cm, x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16], - bs); - super_block_yrd_8x8(cm, x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8], bs); - super_block_yrd_4x4(cm, x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4], bs); + super_block_yrd_for_txfm(cm, x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16], + bs, TX_16X16); + super_block_yrd_for_txfm(cm, x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8], bs, + TX_8X8); + super_block_yrd_for_txfm(cm, x, &r[TX_4X4][0], &d[TX_4X4], &s[TX_4X4], bs, + TX_4X4); choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache, TX_32X32 - (bs < BLOCK_SIZE_SB32X32) @@ -920,7 +821,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, tempa = ta; templ = tl; - ratey = cost_coeffs(cm, x, ib, + ratey = cost_coeffs(cm, x, 0, ib, PLANE_TYPE_Y_WITH_DC, &tempa, &templ, TX_4X4, 16); rate += ratey; distortion = vp9_block_error(coeff, @@ -1147,7 +1048,7 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib, distortion = vp9_block_error_c(coeff, BLOCK_OFFSET(xd->plane[0].dqcoeff, idx, 16), 64); - rate_t = cost_coeffs(cm, x, idx, PLANE_TYPE_Y_WITH_DC, + rate_t = cost_coeffs(cm, x, 0, idx, PLANE_TYPE_Y_WITH_DC, ta_temp, tl_temp, TX_8X8, 16); rate += rate_t; @@ -1182,12 +1083,12 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib, distortion += vp9_block_error_c(coeff, BLOCK_OFFSET(xd->plane[0].dqcoeff, ib + iblock[i], 16), 16 << do_two); - rate_t += cost_coeffs(cm, x, ib + iblock[i], PLANE_TYPE_Y_WITH_DC, + rate_t += cost_coeffs(cm, x, 0, ib + iblock[i], PLANE_TYPE_Y_WITH_DC, &ta_temp[i & 1], &tl_temp[i >> 1], TX_4X4, 16); if (do_two) { i++; - rate_t += cost_coeffs(cm, x, ib + iblock[i], PLANE_TYPE_Y_WITH_DC, + rate_t += cost_coeffs(cm, x, 0, ib + iblock[i], PLANE_TYPE_Y_WITH_DC, &ta_temp[i & 1], &tl_temp[i >> 1], TX_4X4, 16); } @@ -1327,165 +1228,16 @@ static int64_t rd_pick_intra8x8mby_modes_and_txsz(VP9_COMP *cpi, MACROBLOCK *x, } #endif // !CONFIG_SB8X8 -static int rd_cost_sbuv_4x4(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize) - 1, bw = 1 << bwl; - const int bh = 1 << (b_height_log2(bsize) - 1); - int yoff = 4 * bw * bh; - int p, b, cost = 0; +static void super_block_uvrd_for_txfm(VP9_COMMON *const cm, MACROBLOCK *x, + int *rate, int *distortion, + int *skippable, BLOCK_SIZE_TYPE bsize, + TX_SIZE uv_tx_size) { MACROBLOCKD *const xd = &x->e_mbd; + vp9_xform_quant_sbuv(cm, x, bsize); - for (p = 1; p < MAX_MB_PLANE; p++) { - ENTROPY_CONTEXT t_above[8], t_left[8]; - - vpx_memcpy(t_above, xd->plane[p].above_context, - sizeof(ENTROPY_CONTEXT) * 2 * bw >> xd->plane[p].subsampling_x); - vpx_memcpy(t_left, xd->plane[p].left_context, - sizeof(ENTROPY_CONTEXT) * 2 * bh >> xd->plane[p].subsampling_y); - for (b = 0; b < bw * bh; b++) { - const int x_idx = b & (bw - 1), y_idx = b >> bwl; - cost += cost_coeffs(cm, x, yoff + b, PLANE_TYPE_UV, - t_above + x_idx, t_left + y_idx, - TX_4X4, bw * bh * 4); - } - yoff = (yoff * 5) >> 2; // u -> v - } - - return cost; -} - -static void super_block_uvrd_4x4(VP9_COMMON *const cm, MACROBLOCK *x, - int *rate, int *distortion, int *skip, - BLOCK_SIZE_TYPE bsize) { - MACROBLOCKD *const xd = &x->e_mbd; - - vp9_transform_sbuv_4x4(x, bsize); - vp9_quantize_sbuv_4x4(x, bsize); - - *rate = rd_cost_sbuv_4x4(cm, x, bsize); - *distortion = block_error_sbuv(x, bsize, 2); - *skip = vp9_sbuv_is_skippable(xd, bsize); -} - -static int rd_cost_sbuv_8x8(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize) - 2, bw = 1 << bwl; - const int bh = 1 << (b_height_log2(bsize) - 2); - int yoff = 16 * bw * bh; - int p, b, cost = 0; - MACROBLOCKD *const xd = &x->e_mbd; - - for (p = 1; p < MAX_MB_PLANE; p++) { - ENTROPY_CONTEXT t_above[8], t_left[8]; - - vpx_memcpy(t_above, xd->plane[p].above_context, - sizeof(ENTROPY_CONTEXT) * 4 * bw >> xd->plane[p].subsampling_x); - vpx_memcpy(t_left, xd->plane[p].left_context, - sizeof(ENTROPY_CONTEXT) * 4 * bh >> xd->plane[p].subsampling_y); - for (b = 0; b < bw * bh; b++) { - const int x_idx = b & (bw - 1), y_idx = b >> bwl; - cost += cost_coeffs(cm, x, yoff + b * 4, PLANE_TYPE_UV, - t_above + x_idx * 2, t_left + y_idx * 2, - TX_8X8, bw * bh * 16); - } - yoff = (yoff * 5) >> 2; // u -> v - } - - return cost; -} - -static void super_block_uvrd_8x8(VP9_COMMON *const cm, MACROBLOCK *x, - int *rate, int *distortion, int *skip, - BLOCK_SIZE_TYPE bsize) { - MACROBLOCKD *const xd = &x->e_mbd; - - vp9_transform_sbuv_8x8(x, bsize); - vp9_quantize_sbuv_8x8(x, bsize); - - *rate = rd_cost_sbuv_8x8(cm, x, bsize); - *distortion = block_error_sbuv(x, bsize, 2); - *skip = vp9_sbuv_is_skippable(xd, bsize); -} - -static int rd_cost_sbuv_16x16(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize) - 3, bw = 1 << bwl; - const int bh = 1 << (b_height_log2(bsize) - 3); - int yoff = 64 * bw * bh; - int p, b, cost = 0; - MACROBLOCKD *const xd = &x->e_mbd; - - for (p = 1; p < MAX_MB_PLANE; p++) { - ENTROPY_CONTEXT t_above[8], t_left[8]; - - vpx_memcpy(t_above, xd->plane[p].above_context, - sizeof(ENTROPY_CONTEXT) * 8 * bw >> xd->plane[p].subsampling_x); - vpx_memcpy(t_left, xd->plane[p].left_context, - sizeof(ENTROPY_CONTEXT) * 8 * bh >> xd->plane[p].subsampling_y); - for (b = 0; b < bw * bh; b++) { - const int x_idx = b & (bw - 1), y_idx = b >> bwl; - cost += cost_coeffs(cm, x, yoff + b * 16, PLANE_TYPE_UV, - t_above + x_idx * 4, t_left + y_idx * 4, - TX_16X16, bw * bh * 64); - } - yoff = (yoff * 5) >> 2; // u -> v - } - - return cost; -} - -static void super_block_uvrd_16x16(VP9_COMMON *const cm, MACROBLOCK *x, - int *rate, int *distortion, int *skip, - BLOCK_SIZE_TYPE bsize) { - MACROBLOCKD *const xd = &x->e_mbd; - - vp9_transform_sbuv_16x16(x, bsize); - vp9_quantize_sbuv_16x16(x, bsize); - - *rate = rd_cost_sbuv_16x16(cm, x, bsize); - *distortion = block_error_sbuv(x, bsize, 2); - *skip = vp9_sbuv_is_skippable(xd, bsize); -} - -static int rd_cost_sbuv_32x32(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize) { - const int bwl = b_width_log2(bsize) - 4, bw = 1 << bwl; - const int bh = 1 << (b_height_log2(bsize) - 4); - int yoff = 256 * bh * bw; - int p, b, cost = 0; - MACROBLOCKD *const xd = &x->e_mbd; - - for (p = 1; p < MAX_MB_PLANE; p++) { - ENTROPY_CONTEXT t_above[8], t_left[8]; - - vpx_memcpy(t_above, xd->plane[p].above_context, - sizeof(ENTROPY_CONTEXT) * 16 * bw >> xd->plane[p].subsampling_x); - vpx_memcpy(t_left, xd->plane[p].left_context, - sizeof(ENTROPY_CONTEXT) * 16 * bh >> xd->plane[p].subsampling_y); - for (b = 0; b < bw * bh; b++) { - const int x_idx = b * (bw - 1), y_idx = b >> bwl; - cost += cost_coeffs(cm, x, yoff + b * 64, PLANE_TYPE_UV, - t_above + x_idx * 8, t_left + y_idx * 8, - TX_32X32, 256 * bh * bw); - } - yoff = (yoff * 5) >> 2; // u -> v - } - - return cost; -} -#undef UVCTX - -static void super_block_uvrd_32x32(VP9_COMMON *const cm, MACROBLOCK *x, - int *rate, int *distortion, int *skip, - BLOCK_SIZE_TYPE bsize) { - MACROBLOCKD *const xd = &x->e_mbd; - - vp9_transform_sbuv_32x32(x, bsize); - vp9_quantize_sbuv_32x32(x, bsize); - - *rate = rd_cost_sbuv_32x32(cm, x, bsize); - *distortion = block_error_sbuv(x, bsize, 0); - *skip = vp9_sbuv_is_skippable(xd, bsize); + *distortion = block_error_sbuv(x, bsize, uv_tx_size == TX_32X32 ? 0 : 2); + *rate = rdcost_uv(cm, x, bsize, uv_tx_size); + *skippable = vp9_sbuv_is_skippable(xd, bsize); } static void super_block_uvrd(VP9_COMMON *const cm, MACROBLOCK *x, @@ -1497,13 +1249,17 @@ static void super_block_uvrd(VP9_COMMON *const cm, MACROBLOCK *x, vp9_subtract_sbuv(x, bsize); if (mbmi->txfm_size >= TX_32X32 && bsize >= BLOCK_SIZE_SB64X64) { - super_block_uvrd_32x32(cm, x, rate, distortion, skippable, bsize); + super_block_uvrd_for_txfm(cm, x, rate, distortion, skippable, bsize, + TX_32X32); } else if (mbmi->txfm_size >= TX_16X16 && bsize >= BLOCK_SIZE_SB32X32) { - super_block_uvrd_16x16(cm, x, rate, distortion, skippable, bsize); + super_block_uvrd_for_txfm(cm, x, rate, distortion, skippable, bsize, + TX_16X16); } else if (mbmi->txfm_size >= TX_8X8 && bsize >= BLOCK_SIZE_MB16X16) { - super_block_uvrd_8x8(cm, x, rate, distortion, skippable, bsize); + super_block_uvrd_for_txfm(cm, x, rate, distortion, skippable, bsize, + TX_8X8); } else { - super_block_uvrd_4x4(cm, x, rate, distortion, skippable, bsize); + super_block_uvrd_for_txfm(cm, x, rate, distortion, skippable, bsize, + TX_4X4); } } @@ -1740,7 +1496,7 @@ static int64_t encode_inter_mb_segment(VP9_COMMON *const cm, BLOCK_OFFSET(xd->plane[0].dqcoeff, i, 16), 16); *distortion += thisdistortion; - *labelyrate += cost_coeffs(cm, x, i, PLANE_TYPE_Y_WITH_DC, + *labelyrate += cost_coeffs(cm, x, 0, i, PLANE_TYPE_Y_WITH_DC, ta + (i & 1), tl + (i >> 1), TX_4X4, 16); } @@ -2250,7 +2006,7 @@ static int64_t encode_inter_mb_segment(VP9_COMMON *const cm, thisdistortion = vp9_block_error(coeff, BLOCK_OFFSET(xd->plane[0].dqcoeff, i, 16), 16); *distortion += thisdistortion; - *labelyrate += cost_coeffs(cm, x, i, PLANE_TYPE_Y_WITH_DC, + *labelyrate += cost_coeffs(cm, x, 0, i, PLANE_TYPE_Y_WITH_DC, ta + (i & 3), tl + (i >> 2), TX_4X4, 16); } @@ -2333,7 +2089,7 @@ static int64_t encode_inter_mb_segment_8x8(VP9_COMMON *const cm, BLOCK_OFFSET(xd->plane[0].dqcoeff, idx, 16), 64); otherdist += thisdistortion; xd->mode_info_context->mbmi.txfm_size = TX_8X8; - othercost += cost_coeffs(cm, x, idx, PLANE_TYPE_Y_WITH_DC, + othercost += cost_coeffs(cm, x, 0, idx, PLANE_TYPE_Y_WITH_DC, tac + (i & 1) * 2, tlc + (i & 2), TX_8X8, 16); @@ -2352,12 +2108,12 @@ static int64_t encode_inter_mb_segment_8x8(VP9_COMMON *const cm, BLOCK_OFFSET(xd->plane[0].dqcoeff, ib + iblock[j], 16), 32); *distortion += thisdistortion; *labelyrate += - cost_coeffs(cm, x, ib + iblock[j], PLANE_TYPE_Y_WITH_DC, + cost_coeffs(cm, x, 0, ib + iblock[j], PLANE_TYPE_Y_WITH_DC, ta + (i & 1) * 2, tl + (i & 2) + ((j & 2) >> 1), TX_4X4, 16); *labelyrate += - cost_coeffs(cm, x, ib + iblock[j] + 1, + cost_coeffs(cm, x, 0, ib + iblock[j] + 1, PLANE_TYPE_Y_WITH_DC, ta + (i & 1) * 2 + 1, tl + (i & 2) + ((j & 2) >> 1), @@ -2379,12 +2135,12 @@ static int64_t encode_inter_mb_segment_8x8(VP9_COMMON *const cm, otherdist += thisdistortion; xd->mode_info_context->mbmi.txfm_size = TX_4X4; othercost += - cost_coeffs(cm, x, ib + iblock[j], PLANE_TYPE_Y_WITH_DC, + cost_coeffs(cm, x, 0, ib + iblock[j], PLANE_TYPE_Y_WITH_DC, tac + (i & 1) * 2, tlc + (i & 2) + ((j & 2) >> 1), TX_4X4, 16); othercost += - cost_coeffs(cm, x, ib + iblock[j] + 1, + cost_coeffs(cm, x, 0, ib + iblock[j] + 1, PLANE_TYPE_Y_WITH_DC, tac + (i & 1) * 2 + 1, tlc + (i & 2) + ((j & 2) >> 1), @@ -2397,7 +2153,7 @@ static int64_t encode_inter_mb_segment_8x8(VP9_COMMON *const cm, thisdistortion = vp9_block_error_c(coeff, BLOCK_OFFSET(xd->plane[0].dqcoeff, idx, 16), 64); *distortion += thisdistortion; - *labelyrate += cost_coeffs(cm, x, idx, PLANE_TYPE_Y_WITH_DC, + *labelyrate += cost_coeffs(cm, x, 0, idx, PLANE_TYPE_Y_WITH_DC, ta + (i & 1) * 2, tl + (i & 2), TX_8X8, 16); @@ -4220,8 +3976,8 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, vp9_subtract_sbuv(x, BLOCK_SIZE_MB16X16); - super_block_uvrd_4x4(cm, x, &rate_uv, &distortion_uv, - &uv_skippable, BLOCK_SIZE_MB16X16); + super_block_uvrd_for_txfm(cm, x, &rate_uv, &distortion_uv, + &uv_skippable, BLOCK_SIZE_MB16X16, TX_4X4); rate2 += rate_uv; distortion2 += distortion_uv; skippable = skippable && uv_skippable; @@ -5240,8 +4996,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col, bsize); vp9_subtract_sbuv(x, bsize); - super_block_uvrd_4x4(cm, x, &rate_uv, &distortion_uv, - &uv_skippable, bsize); + super_block_uvrd_for_txfm(cm, x, &rate_uv, &distortion_uv, + &uv_skippable, bsize, TX_4X4); rate2 += rate_uv; distortion2 += distortion_uv; skippable = skippable && uv_skippable;