From 16f25f9dc83a37f2a16c7652445b6a6ecf8034af Mon Sep 17 00:00:00 2001 From: Yaowu Xu Date: Mon, 11 Feb 2013 15:58:22 -0800 Subject: [PATCH] fix the lossless experiment Change-Id: I95acfc1417634b52d344586ab97f0abaa9a4b256 --- vp9/common/vp9_blockd.h | 21 +++++++++++++ vp9/common/vp9_invtrans.c | 2 +- vp9/common/vp9_rtcd_defs.sh | 5 +++ vp9/decoder/vp9_decodframe.c | 73 +++++++++++++++++++++++-------------------- vp9/decoder/vp9_dequantize.h | 14 --------- vp9/decoder/vp9_idct_blk.c | 12 +++---- vp9/decoder/vp9_onyxd_int.h | 6 ---- vp9/encoder/vp9_encodeframe.c | 2 ++ vp9/encoder/vp9_rdopt.c | 2 +- 9 files changed, 75 insertions(+), 62 deletions(-) diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index 0d51f06..b0c1bfa 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -386,11 +386,28 @@ typedef struct macroblockd { unsigned int frames_since_golden; unsigned int frames_till_alt_ref_frame; +#if CONFIG_LOSSLESS + int lossless; +#endif /* Inverse transform function pointers. */ void (*inv_xform4x4_1_x8)(int16_t *input, int16_t *output, int pitch); void (*inv_xform4x4_x8)(int16_t *input, int16_t *output, int pitch); void (*inv_walsh4x4_1)(int16_t *in, int16_t *out); void (*inv_walsh4x4_lossless)(int16_t *in, int16_t *out); + void (*idct_add)(int16_t *input, const int16_t *dq, + uint8_t *pred, uint8_t *output, int pitch, int stride); + void (*dc_idct_add)(int16_t *input, const int16_t *dq, + uint8_t *pred, uint8_t *output, int pitch, int stride, int dc); + void (*dc_only_idct_add)(int input_dc, uint8_t *pred_ptr, + uint8_t *dst_ptr, int pitch, int stride); + void (*dc_idct_add_y_block)(int16_t *q, const int16_t *dq, + uint8_t *pre, uint8_t *dst, int stride, uint16_t *eobs, + const int16_t *dc); + void (*idct_add_y_block)(int16_t *q, const int16_t *dq, + uint8_t *pre, uint8_t *dst, int stride, uint16_t *eobs); + void (*idct_add_uv_block)(int16_t *q, const int16_t *dq, + uint8_t *pre, uint8_t *dst_u, uint8_t *dst_v, int stride, + uint16_t *eobs); struct subpix_fn_table subpix; @@ -501,6 +518,10 @@ static TX_TYPE get_tx_type_4x4(const MACROBLOCKD *xd, const BLOCKD *b) { int ib = (int)(b - xd->block); if (ib >= 16) return tx_type; +#if CONFIG_LOSSLESS + if (xd->lossless) + return tx_type; +#endif // TODO(rbultje, debargha): Explore ADST usage for superblocks if (xd->mode_info_context->mbmi.sb_type) return tx_type; diff --git a/vp9/common/vp9_invtrans.c b/vp9/common/vp9_invtrans.c index c81fe2d..241a5bc 100644 --- a/vp9/common/vp9_invtrans.c +++ b/vp9/common/vp9_invtrans.c @@ -44,7 +44,7 @@ void vp9_inverse_transform_mby_4x4(MACROBLOCKD *xd) { if (has_2nd_order) { /* do 2nd order transform on the dc block */ - vp9_short_inv_walsh4x4(blockd[24].dqcoeff, blockd[24].diff); + xd->inv_walsh4x4_lossless(blockd[24].dqcoeff, blockd[24].diff); recon_dcblock(xd); } diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index 4dce0c9..02f8b66 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -324,10 +324,15 @@ specialize vp9_dc_only_idct_add if [ "$CONFIG_LOSSLESS" = "yes" ]; then prototype void vp9_short_inv_walsh4x4_1_x8 "int16_t *input, int16_t *output, int pitch" +specialize vp9_short_inv_walsh4x4_1_x8 prototype void vp9_short_inv_walsh4x4_x8 "int16_t *input, int16_t *output, int pitch" +specialize vp9_short_inv_walsh4x4_x8 prototype void vp9_dc_only_inv_walsh_add "int input_dc, uint8_t *pred_ptr, uint8_t *dst_ptr, int pitch, int stride" +specialize vp9_dc_only_inv_walsh_add prototype void vp9_short_inv_walsh4x4_1_lossless "int16_t *in, int16_t *out" +specialize vp9_short_inv_walsh4x4_1_lossless prototype void vp9_short_inv_walsh4x4_lossless "int16_t *in, int16_t *out" +specialize vp9_short_inv_walsh4x4_lossless fi prototype unsigned int vp9_sad32x3 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, int max_sad" diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index 3324186..f103937 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -124,37 +124,42 @@ static void mb_init_dequantizer(VP9D_COMP *pbi, MACROBLOCKD *xd) { } #if CONFIG_LOSSLESS + pbi->mb.lossless = 0; if (!QIndex) { pbi->mb.inv_xform4x4_1_x8 = vp9_short_inv_walsh4x4_1_x8; pbi->mb.inv_xform4x4_x8 = vp9_short_inv_walsh4x4_x8; pbi->mb.inv_walsh4x4_1 = vp9_short_inv_walsh4x4_1_lossless; pbi->mb.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4_lossless; - pbi->idct_add = vp9_dequant_idct_add_lossless_c; - pbi->dc_idct_add = vp9_dequant_dc_idct_add_lossless_c; - pbi->dc_idct_add_y_block = vp9_dequant_dc_idct_add_y_block_lossless_c; - pbi->idct_add_y_block = vp9_dequant_idct_add_y_block_lossless_c; - pbi->idct_add_uv_block = vp9_dequant_idct_add_uv_block_lossless_c; + pbi->mb.idct_add = vp9_dequant_idct_add_lossless_c; + pbi->mb.dc_only_idct_add = vp9_dc_only_inv_walsh_add_c; + pbi->mb.dc_idct_add = vp9_dequant_dc_idct_add_lossless_c; + pbi->mb.dc_idct_add_y_block = vp9_dequant_dc_idct_add_y_block_lossless_c; + pbi->mb.idct_add_y_block = vp9_dequant_idct_add_y_block_lossless_c; + pbi->mb.idct_add_uv_block = vp9_dequant_idct_add_uv_block_lossless_c; + pbi->mb.lossless = 1; } else { pbi->mb.inv_xform4x4_1_x8 = vp9_short_idct4x4llm_1; pbi->mb.inv_xform4x4_x8 = vp9_short_idct4x4llm; pbi->mb.inv_walsh4x4_1 = vp9_short_inv_walsh4x4_1; pbi->mb.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4; - pbi->idct_add = vp9_dequant_idct_add; - pbi->dc_idct_add = vp9_dequant_dc_idct_add; - pbi->dc_idct_add_y_block = vp9_dequant_dc_idct_add_y_block; - pbi->idct_add_y_block = vp9_dequant_idct_add_y_block; - pbi->idct_add_uv_block = vp9_dequant_idct_add_uv_block; + pbi->mb.idct_add = vp9_dequant_idct_add; + pbi->mb.dc_only_idct_add = vp9_dc_only_idct_add_c; + pbi->mb.dc_idct_add = vp9_dequant_dc_idct_add; + pbi->mb.dc_idct_add_y_block = vp9_dequant_dc_idct_add_y_block; + pbi->mb.idct_add_y_block = vp9_dequant_idct_add_y_block; + pbi->mb.idct_add_uv_block = vp9_dequant_idct_add_uv_block; } #else pbi->mb.inv_xform4x4_1_x8 = vp9_short_idct4x4llm_1; pbi->mb.inv_xform4x4_x8 = vp9_short_idct4x4llm; pbi->mb.inv_walsh4x4_1 = vp9_short_inv_walsh4x4_1; pbi->mb.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4; - pbi->idct_add = vp9_dequant_idct_add; - pbi->dc_idct_add = vp9_dequant_dc_idct_add; - pbi->dc_idct_add_y_block = vp9_dequant_dc_idct_add_y_block; - pbi->idct_add_y_block = vp9_dequant_idct_add_y_block; - pbi->idct_add_uv_block = vp9_dequant_idct_add_uv_block; + pbi->mb.idct_add = vp9_dequant_idct_add; + pbi->mb.dc_only_idct_add = vp9_dc_only_idct_add_c; + pbi->mb.dc_idct_add = vp9_dequant_dc_idct_add; + pbi->mb.dc_idct_add_y_block = vp9_dequant_dc_idct_add_y_block; + pbi->mb.idct_add_y_block = vp9_dequant_idct_add_y_block; + pbi->mb.idct_add_uv_block = vp9_dequant_idct_add_uv_block; #endif for (i = 16; i < 24; i++) { @@ -344,15 +349,15 @@ static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd, int i8x8mode = b->bmi.as_mode.first; b = &xd->block[16 + i]; vp9_intra_uv4x4_predict(xd, &xd->block[16 + i], i8x8mode, b->predictor); - pbi->idct_add(b->qcoeff, b->dequant, b->predictor, + xd->idct_add(b->qcoeff, b->dequant, b->predictor, *(b->base_dst) + b->dst, 8, b->dst_stride); b = &xd->block[20 + i]; vp9_intra_uv4x4_predict(xd, &xd->block[20 + i], i8x8mode, b->predictor); - pbi->idct_add(b->qcoeff, b->dequant, b->predictor, + xd->idct_add(b->qcoeff, b->dequant, b->predictor, *(b->base_dst) + b->dst, 8, b->dst_stride); } } else if (xd->mode_info_context->mbmi.mode == SPLITMV) { - pbi->idct_add_uv_block(xd->qcoeff + 16 * 16, xd->block[16].dequant, + xd->idct_add_uv_block(xd->qcoeff + 16 * 16, xd->block[16].dequant, xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.uv_stride, xd->eobs + 16); } else { @@ -399,17 +404,17 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, *(b->base_dst) + b->dst, 16, b->dst_stride, b->eob); } else { - vp9_dequant_idct_add(b->qcoeff, b->dequant, b->predictor, - *(b->base_dst) + b->dst, 16, b->dst_stride); + xd->idct_add(b->qcoeff, b->dequant, b->predictor, + *(b->base_dst) + b->dst, 16, b->dst_stride); } } b = &xd->block[16 + i]; vp9_intra_uv4x4_predict(xd, b, i8x8mode, b->predictor); - pbi->idct_add(b->qcoeff, b->dequant, b->predictor, + xd->idct_add(b->qcoeff, b->dequant, b->predictor, *(b->base_dst) + b->dst, 8, b->dst_stride); b = &xd->block[20 + i]; vp9_intra_uv4x4_predict(xd, b, i8x8mode, b->predictor); - pbi->idct_add(b->qcoeff, b->dequant, b->predictor, + xd->idct_add(b->qcoeff, b->dequant, b->predictor, *(b->base_dst) + b->dst, 8, b->dst_stride); } } else if (mode == B_PRED) { @@ -433,8 +438,8 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, *(b->base_dst) + b->dst, 16, b->dst_stride, b->eob); } else { - vp9_dequant_idct_add(b->qcoeff, b->dequant, b->predictor, - *(b->base_dst) + b->dst, 16, b->dst_stride); + xd->idct_add(b->qcoeff, b->dequant, b->predictor, + *(b->base_dst) + b->dst, 16, b->dst_stride); } } if (!xd->mode_info_context->mbmi.mb_skip_coeff) { @@ -443,7 +448,7 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, xd->above_context->y2 = 0; xd->left_context->y2 = 0; vp9_build_intra_predictors_mbuv(xd); - pbi->idct_add_uv_block(xd->qcoeff + 16 * 16, + xd->idct_add_uv_block(xd->qcoeff + 16 * 16, xd->block[16].dequant, xd->predictor + 16 * 16, xd->dst.u_buffer, @@ -452,13 +457,13 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, xd->eobs + 16); } else if (mode == SPLITMV) { assert(get_2nd_order_usage(xd) == 0); - pbi->idct_add_y_block(xd->qcoeff, + xd->idct_add_y_block(xd->qcoeff, xd->block[0].dequant, xd->predictor, xd->dst.y_buffer, xd->dst.y_stride, xd->eobs); - pbi->idct_add_uv_block(xd->qcoeff + 16 * 16, + xd->idct_add_uv_block(xd->qcoeff + 16 * 16, xd->block[16].dequant, xd->predictor + 16 * 16, xd->dst.u_buffer, @@ -495,8 +500,8 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, *(b->base_dst) + b->dst, 16, b->dst_stride, b->eob); } else { - vp9_dequant_idct_add(b->qcoeff, b->dequant, b->predictor, - *(b->base_dst) + b->dst, 16, b->dst_stride); + xd->idct_add(b->qcoeff, b->dequant, b->predictor, + *(b->base_dst) + b->dst, 16, b->dst_stride); } } } else { @@ -504,7 +509,7 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, assert(get_2nd_order_usage(xd) == 1); vp9_dequantize_b(b); if (xd->eobs[24] > 1) { - vp9_short_inv_walsh4x4(&b->dqcoeff[0], b->diff); + xd->inv_walsh4x4_lossless(&b->dqcoeff[0], b->diff); ((int *)b->qcoeff)[0] = 0; ((int *)b->qcoeff)[1] = 0; ((int *)b->qcoeff)[2] = 0; @@ -518,7 +523,7 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, ((int *)b->qcoeff)[0] = 0; } vp9_dequantize_b(b); - pbi->dc_idct_add_y_block(xd->qcoeff, + xd->dc_idct_add_y_block(xd->qcoeff, xd->block[0].dequant, xd->predictor, xd->dst.y_buffer, @@ -526,7 +531,7 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, xd->eobs, xd->block[24].diff); } - pbi->idct_add_uv_block(xd->qcoeff + 16 * 16, + xd->idct_add_uv_block(xd->qcoeff + 16 * 16, xd->block[16].dequant, xd->predictor + 16 * 16, xd->dst.u_buffer, @@ -644,7 +649,7 @@ static void decode_4x4_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, + x_idx * 16 + (i & 3) * 4, xd->dst.y_stride, xd->dst.y_stride, b->eob); } else { - vp9_dequant_idct_add_c( + xd->idct_add( b->qcoeff, b->dequant, xd->dst.y_buffer + (y_idx * 16 + (i / 4) * 4) * xd->dst.y_stride + x_idx * 16 + (i & 3) * 4, @@ -656,7 +661,7 @@ static void decode_4x4_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, } else { vp9_dequantize_b(b); if (xd->eobs[24] > 1) { - vp9_short_inv_walsh4x4(&b->dqcoeff[0], b->diff); + xd->inv_walsh4x4_lossless(&b->dqcoeff[0], b->diff); ((int *)b->qcoeff)[0] = 0; ((int *)b->qcoeff)[1] = 0; ((int *)b->qcoeff)[2] = 0; diff --git a/vp9/decoder/vp9_dequantize.h b/vp9/decoder/vp9_dequantize.h index 2a0ae80..0fa5144 100644 --- a/vp9/decoder/vp9_dequantize.h +++ b/vp9/decoder/vp9_dequantize.h @@ -42,20 +42,6 @@ extern void vp9_dequant_idct_add_uv_block_lossless_c(int16_t *q, const int16_t * uint16_t *eobs); #endif -typedef void (*vp9_dequant_idct_add_fn_t)(int16_t *input, const int16_t *dq, - unsigned char *pred, unsigned char *output, int pitch, int stride); -typedef void(*vp9_dequant_dc_idct_add_fn_t)(int16_t *input, const int16_t *dq, - unsigned char *pred, unsigned char *output, int pitch, int stride, int dc); - -typedef void(*vp9_dequant_dc_idct_add_y_block_fn_t)(int16_t *q, const int16_t *dq, - unsigned char *pre, unsigned char *dst, int stride, uint16_t *eobs, - const int16_t *dc); -typedef void(*vp9_dequant_idct_add_y_block_fn_t)(int16_t *q, const int16_t *dq, - unsigned char *pre, unsigned char *dst, int stride, uint16_t *eobs); -typedef void(*vp9_dequant_idct_add_uv_block_fn_t)(int16_t *q, const int16_t *dq, - unsigned char *pre, unsigned char *dst_u, unsigned char *dst_v, int stride, - uint16_t *eobs); - void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, int16_t *input, const int16_t *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride, uint16_t eobs); diff --git a/vp9/decoder/vp9_idct_blk.c b/vp9/decoder/vp9_idct_blk.c index 152527c..ad93b49 100644 --- a/vp9/decoder/vp9_idct_blk.c +++ b/vp9/decoder/vp9_idct_blk.c @@ -51,9 +51,9 @@ void vp9_dequant_dc_idct_add_y_block_4x4_inplace_c(int16_t *q, for (i = 0; i < 4; i++) { for (j = 0; j < 4; j++) { if (*eobs++ > 1) - vp9_dequant_dc_idct_add_c(q, dq, dst, dst, stride, stride, dc[0]); + xd->dc_idct_add(q, dq, dst, dst, stride, stride, dc[0]); else - vp9_dc_only_idct_add_c(dc[0], dst, dst, stride, stride); + xd->dc_only_idct_add(dc[0], dst, dst, stride, stride); q += 16; dst += 4; @@ -143,9 +143,9 @@ void vp9_dequant_idct_add_uv_block_4x4_inplace_c(int16_t *q, const int16_t *dq, for (i = 0; i < 2; i++) { for (j = 0; j < 2; j++) { if (*eobs++ > 1) { - vp9_dequant_idct_add_c(q, dq, dstu, dstu, stride, stride); + xd->idct_add(q, dq, dstu, dstu, stride, stride); } else { - vp9_dc_only_idct_add_c(q[0]*dq[0], dstu, dstu, stride, stride); + xd->dc_only_idct_add(q[0]*dq[0], dstu, dstu, stride, stride); ((int *)q)[0] = 0; } @@ -159,9 +159,9 @@ void vp9_dequant_idct_add_uv_block_4x4_inplace_c(int16_t *q, const int16_t *dq, for (i = 0; i < 2; i++) { for (j = 0; j < 2; j++) { if (*eobs++ > 1) { - vp9_dequant_idct_add_c(q, dq, dstv, dstv, stride, stride); + xd->idct_add(q, dq, dstv, dstv, stride, stride); } else { - vp9_dc_only_idct_add_c(q[0]*dq[0], dstv, dstv, stride, stride); + xd->dc_only_idct_add(q[0]*dq[0], dstv, dstv, stride, stride); ((int *)q)[0] = 0; } diff --git a/vp9/decoder/vp9_onyxd_int.h b/vp9/decoder/vp9_onyxd_int.h index e04b9f5..0b0b903 100644 --- a/vp9/decoder/vp9_onyxd_int.h +++ b/vp9/decoder/vp9_onyxd_int.h @@ -70,12 +70,6 @@ typedef struct VP9Decompressor { DETOK detoken; - vp9_dequant_idct_add_fn_t idct_add; - vp9_dequant_dc_idct_add_fn_t dc_idct_add; - vp9_dequant_dc_idct_add_y_block_fn_t dc_idct_add_y_block; - vp9_dequant_idct_add_y_block_fn_t idct_add_y_block; - vp9_dequant_idct_add_uv_block_fn_t idct_add_uv_block; - int refresh_frame_flags; vp9_prob prob_skip_false; diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 927a1b9..1b674f1 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -1543,8 +1543,10 @@ void vp9_encode_frame(VP9_COMP *cpi) { /* transform size (4x4, 8x8, 16x16 or select-per-mb) selection */ #if CONFIG_LOSSLESS + cpi->mb.e_mbd.lossless = 0; if (cpi->oxcf.lossless) { txfm_type = ONLY_4X4; + cpi->mb.e_mbd.lossless = 1; } else #endif /* FIXME (rbultje) diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 317209b..762a929 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -698,7 +698,7 @@ static void macro_block_yrd_16x16(MACROBLOCK *mb, int *Rate, int *Distortion, // TODO(jingning) is it possible to quickly determine whether to force // trailing coefficients to be zero, instead of running trellis // optimization in the rate-distortion optimization loop? - if (mb->e_mbd.mode_info_context->mbmi.mode < I8X8_PRED) + if (mb->optimize && mb->e_mbd.mode_info_context->mbmi.mode < I8X8_PRED) vp9_optimize_mby_16x16(mb); d = vp9_mbblock_error(mb, 0); -- 2.7.4