From 42d6be8080c8201f3a1844357c551cefed9d5f99 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Wed, 13 Feb 2013 12:28:19 -0800 Subject: [PATCH] Remove 2nd-order transform for first-order DC coefficients. Since addition of the larger-scale transforms (16x16, 32x32), these don't give a benefit at macroblock-sizes anymore. At superblock-sizes, 2nd-order transform was never used over the larger transforms. Future work should test whether there is a benefit for that use case. Change-Id: I90cadfc42befaf201de3eb0c4f7330c56e33330a --- vp9/common/vp9_blockd.h | 4 ++++ vp9/decoder/vp9_decodframe.c | 34 +++++++++++++++++++-------------- vp9/decoder/vp9_dequantize.h | 12 ++++++++++++ vp9/decoder/vp9_idct_blk.c | 45 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 81 insertions(+), 14 deletions(-) diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index b2c15fc..3351e69 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -618,6 +618,9 @@ static TX_TYPE get_tx_type(const MACROBLOCKD *xd, const BLOCKD *b) { } static int get_2nd_order_usage(const MACROBLOCKD *xd) { +#if 1 + return 0; +#else int has_2nd_order = (xd->mode_info_context->mbmi.mode != SPLITMV && xd->mode_info_context->mbmi.mode != I8X8_PRED && xd->mode_info_context->mbmi.mode != B_PRED && @@ -625,6 +628,7 @@ static int get_2nd_order_usage(const MACROBLOCKD *xd) { if (has_2nd_order) has_2nd_order = (get_tx_type(xd, xd->block) == DCT_DCT); return has_2nd_order; +#endif } extern void vp9_build_block_doffsets(MACROBLOCKD *xd); diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index 3324186..9f4db6b 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -304,7 +304,8 @@ static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd, 0, xd->eobs[idx]); } } - } else if (xd->mode_info_context->mbmi.mode == SPLITMV) { + } else if (xd->mode_info_context->mbmi.mode == SPLITMV || + get_2nd_order_usage(xd) == 0) { assert(get_2nd_order_usage(xd) == 0); vp9_dequant_idct_add_y_block_8x8(xd->qcoeff, xd->block[0].dequant, @@ -450,7 +451,7 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, xd->dst.v_buffer, xd->dst.uv_stride, xd->eobs + 16); - } else if (mode == SPLITMV) { + } else if (mode == SPLITMV || get_2nd_order_usage(xd) == 0) { assert(get_2nd_order_usage(xd) == 0); pbi->idct_add_y_block(xd->qcoeff, xd->block[0].dequant, @@ -595,13 +596,8 @@ static void decode_8x8_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, + x_idx * 16 + (i & 1) * 8, stride, stride, 0, b->eob); } - vp9_dequant_idct_add_uv_block_8x8_inplace_c( - xd->qcoeff + 16 * 16, xd->block[16].dequant, - xd->dst.u_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, - xd->dst.v_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, - xd->dst.uv_stride, xd->eobs + 16, xd); } - } else { + } else if (get_2nd_order_usage(xd) == 1) { vp9_dequantize_b_2x2(b); vp9_short_ihaar2x2(&b->dqcoeff[0], b->diff, 8); ((int *)b->qcoeff)[0] = 0; // 2nd order block are set to 0 after idct @@ -616,12 +612,17 @@ static void decode_8x8_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, xd->qcoeff, xd->block[0].dequant, xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16, xd->dst.y_stride, xd->eobs, xd->block[24].diff, xd); - vp9_dequant_idct_add_uv_block_8x8_inplace_c( - xd->qcoeff + 16 * 16, xd->block[16].dequant, - xd->dst.u_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, - xd->dst.v_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, - xd->dst.uv_stride, xd->eobs + 16, xd); + } else { + vp9_dequant_idct_add_y_block_8x8_inplace_c( + xd->qcoeff, xd->block[0].dequant, + xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16, + xd->dst.y_stride, xd->eobs, xd); } + vp9_dequant_idct_add_uv_block_8x8_inplace_c( + xd->qcoeff + 16 * 16, xd->block[16].dequant, + xd->dst.u_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, + xd->dst.v_buffer + y_idx * 8 * xd->dst.uv_stride + x_idx * 8, + xd->dst.uv_stride, xd->eobs + 16, xd); }; static void decode_4x4_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, @@ -653,7 +654,7 @@ static void decode_4x4_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, xd->dst.y_stride, xd->dst.y_stride); } } - } else { + } else if (get_2nd_order_usage(xd) == 1) { vp9_dequantize_b(b); if (xd->eobs[24] > 1) { vp9_short_inv_walsh4x4(&b->dqcoeff[0], b->diff); @@ -673,6 +674,11 @@ static void decode_4x4_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, xd->qcoeff, xd->block[0].dequant, xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16, xd->dst.y_stride, xd->eobs, xd->block[24].diff, xd); + } else { + vp9_dequant_idct_add_y_block_4x4_inplace_c( + xd->qcoeff, xd->block[0].dequant, + xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16, + xd->dst.y_stride, xd->eobs, xd); } vp9_dequant_idct_add_uv_block_4x4_inplace_c( xd->qcoeff + 16 * 16, xd->block[16].dequant, diff --git a/vp9/decoder/vp9_dequantize.h b/vp9/decoder/vp9_dequantize.h index 2a0ae80..2edbd6a 100644 --- a/vp9/decoder/vp9_dequantize.h +++ b/vp9/decoder/vp9_dequantize.h @@ -77,6 +77,12 @@ void vp9_dequant_dc_idct_add_y_block_8x8_inplace_c(int16_t *q, const int16_t *dq const int16_t *dc, MACROBLOCKD *xd); +void vp9_dequant_idct_add_y_block_8x8_inplace_c(int16_t *q, const int16_t *dq, + unsigned char *dst, + int stride, + uint16_t *eobs, + MACROBLOCKD *xd); + void vp9_dequant_dc_idct_add_y_block_4x4_inplace_c(int16_t *q, const int16_t *dq, unsigned char *dst, int stride, @@ -84,6 +90,12 @@ void vp9_dequant_dc_idct_add_y_block_4x4_inplace_c(int16_t *q, const int16_t *dq const int16_t *dc, MACROBLOCKD *xd); +void vp9_dequant_idct_add_y_block_4x4_inplace_c(int16_t *q, const int16_t *dq, + unsigned char *dst, + int stride, + uint16_t *eobs, + MACROBLOCKD *xd); + void vp9_dequant_idct_add_uv_block_8x8_inplace_c(int16_t *q, const int16_t *dq, unsigned char *dstu, unsigned char *dstv, diff --git a/vp9/decoder/vp9_idct_blk.c b/vp9/decoder/vp9_idct_blk.c index 152527c..b350e4d 100644 --- a/vp9/decoder/vp9_idct_blk.c +++ b/vp9/decoder/vp9_idct_blk.c @@ -64,6 +64,31 @@ void vp9_dequant_dc_idct_add_y_block_4x4_inplace_c(int16_t *q, } } +void vp9_dequant_idct_add_y_block_4x4_inplace_c(int16_t *q, + const int16_t *dq, + uint8_t *dst, + int stride, + uint16_t *eobs, + MACROBLOCKD *xd) { + int i, j; + + for (i = 0; i < 4; i++) { + for (j = 0; j < 4; j++) { + if (*eobs++ > 1) { + vp9_dequant_idct_add_c(q, dq, dst, dst, stride, stride); + } else { + vp9_dc_only_idct_add_c(q[0]*dq[0], dst, dst, stride, stride); + ((int *)q)[0] = 0; + } + + q += 16; + dst += 4; + } + + dst += 4 * stride - 16; + } +} + void vp9_dequant_idct_add_y_block_c(int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, @@ -221,6 +246,26 @@ void vp9_dequant_dc_idct_add_y_block_8x8_inplace_c(int16_t *q, xd->eobs[12]); } +void vp9_dequant_idct_add_y_block_8x8_inplace_c(int16_t *q, + const int16_t *dq, + uint8_t *dst, + int stride, + uint16_t *eobs, + MACROBLOCKD *xd) { + vp9_dequant_idct_add_8x8_c(q, dq, dst, dst, stride, stride, 0, xd->eobs[0]); + + vp9_dequant_idct_add_8x8_c(&q[64], dq, dst + 8, + dst + 8, stride, stride, 0, xd->eobs[4]); + + vp9_dequant_idct_add_8x8_c(&q[128], dq, dst + 8 * stride, + dst + 8 * stride, stride, stride, 0, + xd->eobs[8]); + + vp9_dequant_idct_add_8x8_c(&q[192], dq, dst + 8 * stride + 8, + dst + 8 * stride + 8, stride, stride, 0, + xd->eobs[12]); +} + void vp9_dequant_idct_add_y_block_8x8_c(int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, -- 2.7.4