From f7fa367094250ecaad8b6463ab877ca6e59ce62d Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Sat, 4 May 2013 15:49:41 -0700 Subject: [PATCH] Fix first-pass intra4x4 for sb8x8 experiment. Change-Id: I1df17f45721c690d157800daa6a0b377e3d32bc2 --- vp9/common/vp9_recon.c | 6 +++--- vp9/common/vp9_reconintra.c | 10 ++++++---- vp9/common/vp9_reconintra4x4.c | 15 +++++++++------ vp9/common/vp9_rtcd_defs.sh | 4 ++-- vp9/decoder/vp9_decodframe.c | 2 +- vp9/encoder/vp9_encodeintra.c | 23 ++++++++++------------- vp9/encoder/vp9_rdopt.c | 18 +++++++++++++++--- 7 files changed, 46 insertions(+), 32 deletions(-) diff --git a/vp9/common/vp9_recon.c b/vp9/common/vp9_recon.c index 4ab4f39..6b102d1 100644 --- a/vp9/common/vp9_recon.c +++ b/vp9/common/vp9_recon.c @@ -28,10 +28,10 @@ static INLINE void recon(int rows, int cols, } -void vp9_recon_b_c(uint8_t *pred_ptr, int16_t *diff_ptr, uint8_t *dst_ptr, - int stride) { +void vp9_recon_b_c(uint8_t *pred_ptr, int16_t *diff_ptr, int diff_stride, + uint8_t *dst_ptr, int stride) { assert(pred_ptr == dst_ptr); - recon(4, 4, diff_ptr, 16 >> CONFIG_SB8X8, dst_ptr, stride); + recon(4, 4, diff_ptr, diff_stride, dst_ptr, stride); } #if !CONFIG_SB8X8 diff --git a/vp9/common/vp9_reconintra.c b/vp9/common/vp9_reconintra.c index daeb6b5..aef34c9 100644 --- a/vp9/common/vp9_reconintra.c +++ b/vp9/common/vp9_reconintra.c @@ -592,14 +592,16 @@ void vp9_intra8x8_predict(MACROBLOCKD *xd, #if !CONFIG_NEWBINTRAMODES void vp9_intra4x4_predict(MACROBLOCKD *xd, int block_idx, + BLOCK_SIZE_TYPE bsize, int mode, uint8_t *predictor, int pre_stride) { + const int bwl = b_width_log2(bsize); + const int wmask = (1 << bwl) - 1; const int have_top = - (block_idx >> (2 >> CONFIG_SB8X8)) || xd->up_available; + (block_idx >> bwl) || xd->up_available; const int have_left = - (block_idx & (3 >> CONFIG_SB8X8)) || xd->left_available; - const int have_right = - ((block_idx & (3 >> CONFIG_SB8X8)) != (3 >> CONFIG_SB8X8)); + (block_idx & wmask) || xd->left_available; + const int have_right = ((block_idx & wmask) != wmask); vp9_build_intra_predictors(predictor, pre_stride, predictor, pre_stride, diff --git a/vp9/common/vp9_reconintra4x4.c b/vp9/common/vp9_reconintra4x4.c index 2a7c7f3..ce33aa5 100644 --- a/vp9/common/vp9_reconintra4x4.c +++ b/vp9/common/vp9_reconintra4x4.c @@ -160,13 +160,16 @@ B_PREDICTION_MODE vp9_find_bpred_context(MACROBLOCKD *xd, int block_idx, void vp9_intra4x4_predict(MACROBLOCKD *xd, int block_idx, + BLOCK_SIZE_TYPE bsize, int b_mode, uint8_t *predictor, int ps) { + const int bwl = b_width_log2(bsize); + const int wmask = (1 << bwl) - 1; int i, r, c; - const int have_top = (block_idx >> 2) || xd->up_available; - const int have_left = (block_idx & 3) || xd->left_available; - const int have_right = (block_idx & 3) != 3 || xd->right_available; + const int have_top = (block_idx >> bwl) || xd->up_available; + const int have_left = (block_idx & wmask) || xd->left_available; + const int have_right = (block_idx & wmask) != wmask || xd->right_available; uint8_t left[4], above[8], top_left; /* * 127 127 127 .. 127 127 127 127 127 127 @@ -197,8 +200,8 @@ void vp9_intra4x4_predict(MACROBLOCKD *xd, above[1] = above_ptr[1]; above[2] = above_ptr[2]; above[3] = above_ptr[3]; - if (((block_idx & 3) != 3) || - (have_right && block_idx == 3 && + if (((block_idx & wmask) != wmask) || + (have_right && block_idx == wmask && ((xd->mb_index != 3 && xd->sb_index != 3) || ((xd->mb_index & 1) == 0 && xd->sb_index == 3)))) { above[4] = above_ptr[4]; @@ -212,7 +215,7 @@ void vp9_intra4x4_predict(MACROBLOCKD *xd, above_right -= 32 * ps; if (xd->mb_index == 3) above_right -= 16 * ps; - above_right -= (block_idx & ~3) * ps; + above_right -= 4 * (block_idx >> bwl) * ps; /* use a more distant above-right (from closest available top-right * corner), but with a "localized DC" (similar'ish to TM-pred): diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index b1acc04..e473d81 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -60,7 +60,7 @@ vp9_copy_mem8x8_dspr2=vp9_copy_mem8x8_dspr2 prototype void vp9_copy_mem8x4 "const uint8_t *src, int src_pitch, uint8_t *dst, int dst_pitch" specialize vp9_copy_mem8x4 mmx -prototype void vp9_recon_b "uint8_t *pred_ptr, int16_t *diff_ptr, uint8_t *dst_ptr, int stride" +prototype void vp9_recon_b "uint8_t *pred_ptr, int16_t *diff_ptr, int diff_stride, uint8_t *dst_ptr, int stride" specialize vp9_recon_b if [ "$CONFIG_SB8X8" != "yes" ]; then @@ -98,7 +98,7 @@ specialize vp9_build_intra_predictors_sby_s prototype void vp9_build_intra_predictors_sbuv_s "struct macroblockd *x, enum BLOCK_SIZE_TYPE bsize" specialize vp9_build_intra_predictors_sbuv_s -prototype void vp9_intra4x4_predict "struct macroblockd *xd, int block, int b_mode, uint8_t *predictor, int pre_stride" +prototype void vp9_intra4x4_predict "struct macroblockd *xd, int block, enum BLOCK_SIZE_TYPE bsize, int b_mode, uint8_t *predictor, int pre_stride" specialize vp9_intra4x4_predict; if [ "$CONFIG_SB8X8" != "yes" ]; then diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index 0f87a21..2f713d3 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -354,7 +354,7 @@ static void decode_atom_intra(VP9D_COMP *pbi, MACROBLOCKD *xd, if (!xd->mode_info_context->mbmi.mb_skip_coeff) vp9_decode_coefs_4x4(pbi, xd, r, PLANE_TYPE_Y_WITH_DC, i); #endif - vp9_intra4x4_predict(xd, i, b_mode, dst, xd->plane[0].dst.stride); + vp9_intra4x4_predict(xd, i, bsize, b_mode, dst, xd->plane[0].dst.stride); // TODO(jingning): refactor to use foreach_transformed_block_in_plane_ tx_type = get_tx_type_4x4(xd, i); dequant_add_y(xd, tx_type, i, bsize); diff --git a/vp9/encoder/vp9_encodeintra.c b/vp9/encoder/vp9_encodeintra.c index d5574db..268058e 100644 --- a/vp9/encoder/vp9_encodeintra.c +++ b/vp9/encoder/vp9_encodeintra.c @@ -22,15 +22,12 @@ int vp9_encode_intra(VP9_COMP *cpi, MACROBLOCK *x, int use_16x16_pred) { MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi; (void) cpi; -#if !CONFIG_SB8X8 if (use_16x16_pred) { -#endif mbmi->mode = DC_PRED; mbmi->uv_mode = DC_PRED; mbmi->ref_frame = INTRA_FRAME; vp9_encode_intra16x16mby(&cpi->common, x); -#if !CONFIG_SB8X8 } else { int i; @@ -39,7 +36,6 @@ int vp9_encode_intra(VP9_COMP *cpi, MACROBLOCK *x, int use_16x16_pred) { encode_intra4x4block(x, i, BLOCK_SIZE_MB16X16); } } -#endif return vp9_get_mb_ss(x->plane[0].src_diff); } @@ -61,36 +57,37 @@ static void encode_intra4x4block(MACROBLOCK *x, int ib, raster_block_offset_int16(xd, bsize, 0, ib, xd->plane[0].diff); int16_t* const coeff = BLOCK_OFFSET(x->plane[0].coeff, ib, 16); + const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize); - assert(ib < (16 >> (2 * CONFIG_SB8X8))); + assert(ib < (1 << (bwl + bhl))); #if CONFIG_NEWBINTRAMODES xd->mode_info_context->bmi[ib].as_mode.context = vp9_find_bpred_context(&x->e_mbd, ib, dst, xd->plane[0].dst.stride); #endif - vp9_intra4x4_predict(&x->e_mbd, ib, + vp9_intra4x4_predict(&x->e_mbd, ib, bsize, xd->mode_info_context->bmi[ib].as_mode.first, dst, xd->plane[0].dst.stride); - vp9_subtract_block(4, 4, src_diff, 16 >> CONFIG_SB8X8, + vp9_subtract_block(4, 4, src_diff, 4 << bwl, src, x->plane[0].src.stride, dst, xd->plane[0].dst.stride); tx_type = get_tx_type_4x4(&x->e_mbd, ib); if (tx_type != DCT_DCT) { - vp9_short_fht4x4(src_diff, coeff, 16 >> CONFIG_SB8X8, tx_type); + vp9_short_fht4x4(src_diff, coeff, 4 << bwl, tx_type); x->quantize_b_4x4(x, ib, tx_type, 16); vp9_short_iht4x4(BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16), - diff, 16 >> CONFIG_SB8X8, tx_type); + diff, 4 << bwl, tx_type); } else { - x->fwd_txm4x4(src_diff, coeff, 32 >> CONFIG_SB8X8); + x->fwd_txm4x4(src_diff, coeff, 8 << bwl); x->quantize_b_4x4(x, ib, tx_type, 16); vp9_inverse_transform_b_4x4(&x->e_mbd, xd->plane[0].eobs[ib], BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16), - diff, 32 >> CONFIG_SB8X8); + diff, 8 << bwl); } - vp9_recon_b(dst, diff, dst, xd->plane[0].dst.stride); + vp9_recon_b(dst, diff, 4 << bwl, dst, xd->plane[0].dst.stride); } void vp9_encode_intra4x4mby(MACROBLOCK *mb, BLOCK_SIZE_TYPE bsize) { @@ -203,7 +200,7 @@ void vp9_encode_intra8x8(MACROBLOCK *x, int ib) { raster_block_offset_uint8(xd, BLOCK_SIZE_MB16X16, 0, ib + iblock[i], xd->plane[0].dst.buf, xd->plane[0].dst.stride); - vp9_recon_b_c(dst, diff, dst, xd->plane[0].dst.stride); + vp9_recon_b_c(dst, diff, 16, dst, xd->plane[0].dst.stride); } } diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 0600de2..f2cee7f 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -798,7 +798,13 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, rate = bmode_costs[mode]; #endif - vp9_intra4x4_predict(xd, ib, mode, dst, xd->plane[0].dst.stride); + vp9_intra4x4_predict(xd, ib, +#if CONFIG_SB8X8 + BLOCK_SIZE_SB8X8, +#else + BLOCK_SIZE_MB16X16, +#endif + mode, dst, xd->plane[0].dst.stride); vp9_subtract_block(4, 4, src_diff, 16 >> CONFIG_SB8X8, src, src_stride, dst, xd->plane[0].dst.stride); @@ -846,9 +852,15 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, else xd->inv_txm4x4(best_dqcoeff, diff, 32 >> CONFIG_SB8X8); - vp9_intra4x4_predict(xd, ib, *best_mode, + vp9_intra4x4_predict(xd, ib, +#if CONFIG_SB8X8 + BLOCK_SIZE_SB8X8, +#else + BLOCK_SIZE_MB16X16, +#endif + *best_mode, dst, xd->plane[0].dst.stride); - vp9_recon_b(dst, diff, + vp9_recon_b(dst, diff, 16 >> CONFIG_SB8X8, dst, xd->plane[0].dst.stride); return best_rd; -- 2.7.4