From 4a88ad89fdda7b1b1f5f98c9791f135b9119ae5f Mon Sep 17 00:00:00 2001 From: Jingning Han Date: Tue, 7 May 2013 15:36:30 -0700 Subject: [PATCH] Extend left/above partition context to per mi(8x8) Update and buffer left/above partition information context per 8x8 block. This allows to further enable recursive partition down to 4x4 block size, and hence deprecating I4X4_PRED and SPLITMV. This commit also fixes a context buffer swap/restore issue in 32x32 partition type search. This gives 0.1% performance gain for derf/yt. Will refactor the superblock partition type search into recursion form. Change-Id: Ib61975aca5f12b78d8018481d7fa1393d085689b --- vp9/common/vp9_alloccommon.c | 12 ++++++------ vp9/common/vp9_blockd.h | 10 ++-------- vp9/common/vp9_enums.h | 2 ++ vp9/common/vp9_onyxc_int.h | 6 +++--- vp9/decoder/vp9_decodframe.c | 19 +++++++++---------- vp9/encoder/vp9_bitstream.c | 11 +++++------ vp9/encoder/vp9_encodeframe.c | 33 ++++++++++++++++++++++----------- 7 files changed, 49 insertions(+), 44 deletions(-) diff --git a/vp9/common/vp9_alloccommon.c b/vp9/common/vp9_alloccommon.c index 8179a69..fd39439 100644 --- a/vp9/common/vp9_alloccommon.c +++ b/vp9/common/vp9_alloccommon.c @@ -70,7 +70,7 @@ void vp9_free_frame_buffers(VP9_COMMON *oci) { } int vp9_alloc_frame_buffers(VP9_COMMON *oci, int width, int height) { - int i, mb_cols; + int i, mi_cols; // Our internal buffers are always multiples of 16 const int aligned_width = multiple16(width); @@ -140,19 +140,19 @@ int vp9_alloc_frame_buffers(VP9_COMMON *oci, int width, int height) { // FIXME(jkoleszar): allocate subsampled arrays for U/V once subsampling // information is exposed at this level - mb_cols = mb_cols_aligned_to_sb(oci); - oci->above_context[0] = vpx_calloc(sizeof(ENTROPY_CONTEXT) * 12 * mb_cols, 1); + mi_cols = mi_cols_aligned_to_sb(oci); + oci->above_context[0] = vpx_calloc(sizeof(ENTROPY_CONTEXT) * 6 * mi_cols, 1); if (!oci->above_context[0]) { vp9_free_frame_buffers(oci); return 1; } oci->above_context[1] = - oci->above_context[0] + sizeof(ENTROPY_CONTEXT) * 4 * mb_cols; + oci->above_context[0] + sizeof(ENTROPY_CONTEXT) * 2 * mi_cols; oci->above_context[2] = - oci->above_context[1] + sizeof(ENTROPY_CONTEXT) * 4 * mb_cols; + oci->above_context[1] + sizeof(ENTROPY_CONTEXT) * 2 * mi_cols; oci->above_seg_context = - vpx_calloc(sizeof(PARTITION_CONTEXT) * mb_cols_aligned_to_sb(oci), 1); + vpx_calloc(sizeof(PARTITION_CONTEXT) * mi_cols, 1); if (!oci->above_seg_context) { vp9_free_frame_buffers(oci); diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index af8e662..e452684 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -413,7 +413,7 @@ typedef struct macroblockd { static INLINE void update_partition_context(MACROBLOCKD *xd, BLOCK_SIZE_TYPE sb_type, BLOCK_SIZE_TYPE sb_size) { - int bsl = mi_width_log2(sb_size), bs; + int bsl = mi_width_log2(sb_size), bs = 1 << bsl; int bwl = mi_width_log2(sb_type); int bhl = mi_height_log2(sb_type); int boffset = mi_width_log2(BLOCK_SIZE_SB64X64) - bsl; @@ -422,8 +422,6 @@ static INLINE void update_partition_context(MACROBLOCKD *xd, if (bsl == 0) return; - bs = 1 << (bsl - 1); - // update the partition context at the end notes. set partition bits // of block sizes larger than the current one to be one, and partition // bits of smaller block sizes to be zero. @@ -454,18 +452,14 @@ static INLINE void update_partition_context(MACROBLOCKD *xd, static INLINE int partition_plane_context(MACROBLOCKD *xd, BLOCK_SIZE_TYPE sb_type) { - int bsl = mi_width_log2(sb_type), bs; + int bsl = mi_width_log2(sb_type), bs = 1 << bsl; int above = 0, left = 0, i; int boffset = mi_width_log2(BLOCK_SIZE_SB64X64) - bsl; - bs = 1 << (bsl - 1); - assert(mi_width_log2(sb_type) == mi_height_log2(sb_type)); assert(bsl >= 0); assert(boffset >= 0); - bs = 1 << (bsl - 1); - for (i = 0; i < bs; i++) above |= (xd->above_seg_context[i] & (1 << boffset)); for (i = 0; i < bs; i++) diff --git a/vp9/common/vp9_enums.h b/vp9/common/vp9_enums.h index b00d892..1663195 100644 --- a/vp9/common/vp9_enums.h +++ b/vp9/common/vp9_enums.h @@ -18,6 +18,8 @@ #define MI_SIZE (1 << LOG2_MI_SIZE) #define MI_UV_SIZE (1 << (LOG2_MI_SIZE - 1)) +#define MI_MASK ((64 >> LOG2_MI_SIZE) - 1) + typedef enum BLOCK_SIZE_TYPE { BLOCK_SIZE_AB4X4, #if CONFIG_AB4X4 diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h index d9d2989..de2cace 100644 --- a/vp9/common/vp9_onyxc_int.h +++ b/vp9/common/vp9_onyxc_int.h @@ -217,7 +217,7 @@ typedef struct VP9Common { // partition contexts PARTITION_CONTEXT *above_seg_context; - PARTITION_CONTEXT left_seg_context[4]; + PARTITION_CONTEXT left_seg_context[8]; /* keyframe block modes are predicted by their above, left neighbors */ @@ -297,8 +297,8 @@ static void ref_cnt_fb(int *buf, int *idx, int new_idx) { buf[new_idx]++; } -static int mb_cols_aligned_to_sb(VP9_COMMON *cm) { - return (cm->mb_cols + 3) & ~3; +static int mi_cols_aligned_to_sb(VP9_COMMON *cm) { + return 2 * ((cm->mb_cols + 3) & ~3); } static void set_mi_row_col(VP9_COMMON *cm, MACROBLOCKD *xd, diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index 3007863..f9c2a51 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -372,8 +372,8 @@ static void set_offsets(VP9D_COMP *pbi, BLOCK_SIZE_TYPE bsize, xd->plane[i].left_context = cm->left_context[i] + (((mi_row * 2) & 15) >> xd->plane[i].subsampling_y); } - xd->above_seg_context = cm->above_seg_context + (mi_col >> 1); - xd->left_seg_context = cm->left_seg_context + ((mi_row >> 1) & 3); + xd->above_seg_context = cm->above_seg_context + mi_col; + xd->left_seg_context = cm->left_seg_context + (mi_row & MI_MASK); // Distance of Mb to the various image edges. These are specified to 8th pel // as they are always compared to values that are in 1/8th pel units @@ -443,9 +443,8 @@ static void decode_modes_sb(VP9D_COMP *pbi, int mi_row, int mi_col, if (bsize > BLOCK_SIZE_SB8X8) { int pl; // read the partition information - xd->left_seg_context = - pc->left_seg_context + ((mi_row >> 1) & 3); - xd->above_seg_context = pc->above_seg_context + (mi_col >> 1); + xd->left_seg_context = pc->left_seg_context + (mi_row & MI_MASK); + xd->above_seg_context = pc->above_seg_context + mi_col; pl = partition_plane_context(xd, bsize); partition = treed_read(r, vp9_partition_tree, pc->fc.partition_prob[pl]); @@ -486,8 +485,8 @@ static void decode_modes_sb(VP9D_COMP *pbi, int mi_row, int mi_col, if ((partition == PARTITION_SPLIT) && (bsize > BLOCK_SIZE_MB16X16)) return; - xd->left_seg_context = pc->left_seg_context + ((mi_row >> 1) & 3); - xd->above_seg_context = pc->above_seg_context + (mi_col >> 1); + xd->left_seg_context = pc->left_seg_context + (mi_row & MI_MASK); + xd->above_seg_context = pc->above_seg_context + mi_col; update_partition_context(xd, subsize, bsize); } @@ -849,11 +848,11 @@ static void decode_tiles(VP9D_COMP *pbi, // Note: this memset assumes above_context[0], [1] and [2] // are allocated as part of the same buffer. - vpx_memset(pc->above_context[0], 0, sizeof(ENTROPY_CONTEXT) * 4 * - MAX_MB_PLANE * mb_cols_aligned_to_sb(pc)); + vpx_memset(pc->above_context[0], 0, sizeof(ENTROPY_CONTEXT) * 2 * + MAX_MB_PLANE * mi_cols_aligned_to_sb(pc)); vpx_memset(pc->above_seg_context, 0, sizeof(PARTITION_CONTEXT) * - mb_cols_aligned_to_sb(pc)); + mi_cols_aligned_to_sb(pc)); if (pbi->oxcf.inv_tile_order) { const int n_cols = pc->tile_columns; diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index a7c26a4..050821d 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -878,9 +878,8 @@ static void write_modes_sb(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc, if (bsize > BLOCK_SIZE_SB8X8) { int pl; - xd->left_seg_context = - cm->left_seg_context + ((mi_row >> 1) & 3); - xd->above_seg_context = cm->above_seg_context + (mi_col >> 1); + xd->left_seg_context = cm->left_seg_context + (mi_row & MI_MASK); + xd->above_seg_context = cm->above_seg_context + mi_col; pl = partition_plane_context(xd, bsize); // encode the partition information write_token(bc, vp9_partition_tree, cm->fc.partition_prob[pl], @@ -918,8 +917,8 @@ static void write_modes_sb(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc, if ((partition == PARTITION_SPLIT) && (bsize > BLOCK_SIZE_MB16X16)) return; - xd->left_seg_context = cm->left_seg_context + ((mi_row >> 1) & 3); - xd->above_seg_context = cm->above_seg_context + (mi_col >> 1); + xd->left_seg_context = cm->left_seg_context + (mi_row & MI_MASK); + xd->above_seg_context = cm->above_seg_context + mi_col; update_partition_context(xd, subsize, bsize); } @@ -932,7 +931,7 @@ static void write_modes(VP9_COMP *cpi, vp9_writer* const bc, m_ptr += c->cur_tile_mi_col_start + c->cur_tile_mi_row_start * mis; vpx_memset(c->above_seg_context, 0, sizeof(PARTITION_CONTEXT) * - mb_cols_aligned_to_sb(c)); + mi_cols_aligned_to_sb(c)); for (mi_row = c->cur_tile_mi_row_start; mi_row < c->cur_tile_mi_row_end; diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 8ddad26..49e8cce 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -544,8 +544,8 @@ static INLINE void set_partition_seg_context(VP9_COMP *cpi, VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &cpi->mb.e_mbd; - xd->above_seg_context = cm->above_seg_context + (mi_col >> 1); - xd->left_seg_context = cm->left_seg_context + ((mi_row >> 1) & 3); + xd->above_seg_context = cm->above_seg_context + mi_col; + xd->left_seg_context = cm->left_seg_context + (mi_row & MI_MASK); } static void set_offsets(VP9_COMP *cpi, @@ -878,7 +878,7 @@ static void encode_sb_row(VP9_COMP *cpi, int sb64_rate = 0, sb64_dist = 0; int sb64_skip = 0; ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; - PARTITION_CONTEXT seg_l[4], seg_a[4]; + PARTITION_CONTEXT seg_l[64 / MI_SIZE], seg_a[64 / MI_SIZE]; TOKENEXTRA *tp_orig = *tp; for (p = 0; p < MAX_MB_PLANE; p++) { @@ -888,9 +888,8 @@ static void encode_sb_row(VP9_COMP *cpi, memcpy(l + 16 * p, cm->left_context[p], sizeof(ENTROPY_CONTEXT) * 16 >> xd->plane[p].subsampling_y); } - memcpy(&seg_a, cm->above_seg_context + (mi_col >> 1), - sizeof(seg_a)); - memcpy(&seg_l, cm->left_seg_context, sizeof(seg_l)); + vpx_memcpy(&seg_a, cm->above_seg_context + mi_col, sizeof(seg_a)); + vpx_memcpy(&seg_l, cm->left_seg_context, sizeof(seg_l)); // FIXME(rbultje): this function should probably be rewritten to be // recursive at some point in the future. @@ -902,6 +901,7 @@ static void encode_sb_row(VP9_COMP *cpi, int sb32_skip = 0; int j; ENTROPY_CONTEXT l2[8 * MAX_MB_PLANE], a2[8 * MAX_MB_PLANE]; + PARTITION_CONTEXT sl32[32 / MI_SIZE], sa32[32 / MI_SIZE]; sb_partitioning[i] = BLOCK_SIZE_MB16X16; if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols) @@ -920,6 +920,8 @@ static void encode_sb_row(VP9_COMP *cpi, ((mi_col + x_idx) * 2 >> xd->plane[p].subsampling_x), sizeof(ENTROPY_CONTEXT) * 8 >> xd->plane[p].subsampling_x); } + vpx_memcpy(&sa32, cm->above_seg_context + mi_col + x_idx, sizeof(sa32)); + vpx_memcpy(&sl32, cm->left_seg_context + y_idx, sizeof(sl32)); /* Encode MBs in raster order within the SB */ for (j = 0; j < 4; j++) { @@ -928,6 +930,7 @@ static void encode_sb_row(VP9_COMP *cpi, int r, d; int r2, d2, mb16_rate = 0, mb16_dist = 0, k; ENTROPY_CONTEXT l3[4 * MAX_MB_PLANE], a3[4 * MAX_MB_PLANE]; + PARTITION_CONTEXT sl16[16 / MI_SIZE], sa16[16 / MI_SIZE]; mb_partitioning[i][j] = BLOCK_SIZE_SB8X8; @@ -950,6 +953,9 @@ static void encode_sb_row(VP9_COMP *cpi, ((mi_col + x_idx_m) * 2 >> xd->plane[p].subsampling_x), sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_x); } + vpx_memcpy(&sa16, cm->above_seg_context + mi_col + x_idx_m, + sizeof(sa16)); + vpx_memcpy(&sl16, cm->left_seg_context + y_idx_m, sizeof(sl16)); for (k = 0; k < 4; k++) { xd->b_index = k; @@ -983,6 +989,9 @@ static void encode_sb_row(VP9_COMP *cpi, a3 + 4 * p, sizeof(ENTROPY_CONTEXT) * 4 >> xd->plane[p].subsampling_x); } + vpx_memcpy(cm->above_seg_context + mi_col + x_idx_m, + sa16, sizeof(sa16)); + vpx_memcpy(cm->left_seg_context + y_idx_m, sl16, sizeof(sl16)); // try 8x16 coding r2 = 0; @@ -1102,6 +1111,9 @@ static void encode_sb_row(VP9_COMP *cpi, a2 + 8 * p, sizeof(ENTROPY_CONTEXT) * 8 >> xd->plane[p].subsampling_x); } + // restore partition information context + vpx_memcpy(cm->above_seg_context + mi_col + x_idx, sa32, sizeof(sa32)); + vpx_memcpy(cm->left_seg_context + y_idx, sl32, sizeof(sl32)); set_partition_seg_context(cpi, mi_row + y_idx, mi_col + x_idx); pl = partition_plane_context(xd, BLOCK_SIZE_SB32X32); @@ -1258,8 +1270,7 @@ static void encode_sb_row(VP9_COMP *cpi, memcpy(cm->left_context[p], l + 16 * p, sizeof(ENTROPY_CONTEXT) * 16 >> xd->plane[p].subsampling_y); } - memcpy(cm->above_seg_context + (mi_col >> 1), &seg_a, - sizeof(seg_a)); + memcpy(cm->above_seg_context + mi_col, &seg_a, sizeof(seg_a)); memcpy(cm->left_seg_context, &seg_l, sizeof(seg_l)); set_partition_seg_context(cpi, mi_row, mi_col); @@ -1428,10 +1439,10 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) { // Note: this memset assumes above_context[0], [1] and [2] // are allocated as part of the same buffer. - vpx_memset(cm->above_context[0], 0, sizeof(ENTROPY_CONTEXT) * 4 * - MAX_MB_PLANE * mb_cols_aligned_to_sb(cm)); + vpx_memset(cm->above_context[0], 0, sizeof(ENTROPY_CONTEXT) * 2 * + MAX_MB_PLANE * mi_cols_aligned_to_sb(cm)); vpx_memset(cm->above_seg_context, 0, sizeof(PARTITION_CONTEXT) * - mb_cols_aligned_to_sb(cm)); + mi_cols_aligned_to_sb(cm)); } static void switch_lossless_mode(VP9_COMP *cpi, int lossless) { -- 2.7.4