From e39ecfaa9843735df3a415cfb9ea6685ea4e1187 Mon Sep 17 00:00:00 2001 From: Deb Mukherjee Date: Tue, 4 Mar 2014 11:15:35 -0800 Subject: [PATCH] Preliminary code for variance based paritioning Brings back most of Jim's previous patch for choosing partitioning based on variance while making it compatible with the current state of the code. Also adds a nonrd_use_partition() function to recursively encode for any arbitrary sb_type decisions within a 64x64 block; and includes some refactoring. Currently, when the VAR_BASED_PARTITIONING mode is turned on for speed 7, there is a 10+% speed-up observed. Experiments/improvements with this new partitioning method will be conducted subsequently. Change-Id: Ie6f43bfbde30583e941f450bf07c3b48828c9571 --- vp9/encoder/vp9_encodeframe.c | 782 +++++++++++++++++++++++++++++++++--------- vp9/encoder/vp9_mcomp.c | 2 +- vp9/encoder/vp9_onyx_if.c | 2 +- vp9/encoder/vp9_pickmode.c | 1 - 4 files changed, 631 insertions(+), 156 deletions(-) diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 0a723d7..d0ada2f 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -150,6 +150,446 @@ static BLOCK_SIZE get_nonrd_var_based_fixed_partition(VP9_COMP *cpi, return BLOCK_16X16; } +// Lighter version of set_offsets that only sets the mode info +// pointers. +static inline void set_modeinfo_offsets(VP9_COMMON *const cm, + MACROBLOCKD *const xd, + int mi_row, + int mi_col) { + const int idx_str = xd->mode_info_stride * mi_row + mi_col; + xd->mi_8x8 = cm->mi_grid_visible + idx_str; + xd->prev_mi_8x8 = cm->prev_mi_grid_visible + idx_str; + // xd->last_mi = cm->prev_mi ? xd->prev_mi_8x8[0] : NULL; + xd->mi_8x8[0] = cm->mi + idx_str; +} + +static int is_block_in_mb_map(VP9_COMP *cpi, int mi_row, int mi_col, + BLOCK_SIZE bsize) { + VP9_COMMON *const cm = &cpi->common; + const int mb_rows = cm->mb_rows; + const int mb_cols = cm->mb_cols; + const int mb_row = mi_row >> 1; + const int mb_col = mi_col >> 1; + const int mb_width = num_8x8_blocks_wide_lookup[bsize] >> 1; + const int mb_height = num_8x8_blocks_high_lookup[bsize] >> 1; + int r, c; + if (bsize <= BLOCK_16X16) { + return cpi->active_map[mb_row * mb_cols + mb_col]; + } + for (r = 0; r < mb_height; ++r) { + for (c = 0; c < mb_width; ++c) { + int row = mb_row + r; + int col = mb_col + c; + if (row >= mb_rows || col >= mb_cols) + continue; + if (cpi->active_map[row * mb_cols + col]) + return 1; + } + } + return 0; +} + +static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile, + int mi_row, int mi_col, BLOCK_SIZE bsize) { + MACROBLOCK *const x = &cpi->mb; + VP9_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &x->e_mbd; + MB_MODE_INFO *mbmi; + const int mi_width = num_8x8_blocks_wide_lookup[bsize]; + const int mi_height = num_8x8_blocks_high_lookup[bsize]; + const int mb_row = mi_row >> 1; + const int mb_col = mi_col >> 1; + const int idx_map = mb_row * cm->mb_cols + mb_col; + const struct segmentation *const seg = &cm->seg; + + set_skip_context(xd, cpi->above_context, cpi->left_context, mi_row, mi_col); + + // Activity map pointer + x->mb_activity_ptr = &cpi->mb_activity_map[idx_map]; + + if (cpi->active_map_enabled && !x->e_mbd.lossless) { + x->in_active_map = is_block_in_mb_map(cpi, mi_row, mi_col, bsize); + } else { + x->in_active_map = 1; + } + + set_modeinfo_offsets(cm, xd, mi_row, mi_col); + + mbmi = &xd->mi_8x8[0]->mbmi; + + // Set up destination pointers. + vp9_setup_dst_planes(xd, get_frame_new_buffer(cm), mi_row, mi_col); + + // Set up limit values for MV components. + // Mv beyond the range do not produce new/different prediction block. + x->mv_row_min = -(((mi_row + mi_height) * MI_SIZE) + VP9_INTERP_EXTEND); + x->mv_col_min = -(((mi_col + mi_width) * MI_SIZE) + VP9_INTERP_EXTEND); + x->mv_row_max = (cm->mi_rows - mi_row) * MI_SIZE + VP9_INTERP_EXTEND; + x->mv_col_max = (cm->mi_cols - mi_col) * MI_SIZE + VP9_INTERP_EXTEND; + + // Set up distance of MB to edge of frame in 1/8th pel units. + assert(!(mi_col & (mi_width - 1)) && !(mi_row & (mi_height - 1))); + set_mi_row_col(xd, tile, mi_row, mi_height, mi_col, mi_width, + cm->mi_rows, cm->mi_cols); + + // Set up source buffers. + vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col); + + // R/D setup. + x->rddiv = cpi->RDDIV; + x->rdmult = cpi->RDMULT; + + // Setup segment ID. + if (seg->enabled) { + if (cpi->oxcf.aq_mode != VARIANCE_AQ) { + const uint8_t *const map = seg->update_map ? cpi->segmentation_map + : cm->last_frame_seg_map; + mbmi->segment_id = vp9_get_segment_id(cm, map, bsize, mi_row, mi_col); + } + vp9_init_plane_quantizers(cpi, x); + + if (seg->enabled && cpi->seg0_cnt > 0 && + !vp9_segfeature_active(seg, 0, SEG_LVL_REF_FRAME) && + vp9_segfeature_active(seg, 1, SEG_LVL_REF_FRAME)) { + cpi->seg0_progress = (cpi->seg0_idx << 16) / cpi->seg0_cnt; + } else { + const int y = mb_row & ~3; + const int x = mb_col & ~3; + const int p16 = ((mb_row & 1) << 1) + (mb_col & 1); + const int p32 = ((mb_row & 2) << 2) + ((mb_col & 2) << 1); + const int tile_progress = tile->mi_col_start * cm->mb_rows >> 1; + const int mb_cols = (tile->mi_col_end - tile->mi_col_start) >> 1; + + cpi->seg0_progress = ((y * mb_cols + x * 4 + p32 + p16 + tile_progress) + << 16) / cm->MBs; + } + + x->encode_breakout = cpi->segment_encode_breakout[mbmi->segment_id]; + } else { + mbmi->segment_id = 0; + x->encode_breakout = cpi->encode_breakout; + } +} + +static void duplicate_modeinfo_in_sb(VP9_COMMON * const cm, + MACROBLOCKD *const xd, + int mi_row, + int mi_col, + BLOCK_SIZE bsize) { + const int block_width = num_8x8_blocks_wide_lookup[bsize]; + const int block_height = num_8x8_blocks_high_lookup[bsize]; + const int mis = xd->mode_info_stride; + int i, j; + for (j = 0; j < block_height; ++j) + for (i = 0; i < block_width; ++i) { + if (mi_row + j < cm->mi_rows && mi_col + i < cm->mi_cols) + xd->mi_8x8[j * mis + i] = xd->mi_8x8[0]; + } +} + +static void set_block_size(VP9_COMP * const cpi, + const TileInfo *const tile, + int mi_row, int mi_col, + BLOCK_SIZE bsize) { + if (cpi->common.mi_cols > mi_col && cpi->common.mi_rows > mi_row) { + MACROBLOCKD *const xd = &cpi->mb.e_mbd; + set_modeinfo_offsets(&cpi->common, xd, mi_row, mi_col); + xd->mi_8x8[0]->mbmi.sb_type = bsize; + duplicate_modeinfo_in_sb(&cpi->common, xd, mi_row, mi_col, bsize); + } +} + +typedef struct { + int64_t sum_square_error; + int64_t sum_error; + int count; + int variance; +} var; + +typedef struct { + var none; + var horz[2]; + var vert[2]; +} partition_variance; + +typedef struct { + partition_variance part_variances; + var split[4]; +} v8x8; + +typedef struct { + partition_variance part_variances; + v8x8 split[4]; +} v16x16; + +typedef struct { + partition_variance part_variances; + v16x16 split[4]; +} v32x32; + +typedef struct { + partition_variance part_variances; + v32x32 split[4]; +} v64x64; + +typedef struct { + partition_variance *part_variances; + var *split[4]; +} variance_node; + +typedef enum { + V16X16, + V32X32, + V64X64, +} TREE_LEVEL; + +static void tree_to_node(void *data, BLOCK_SIZE bsize, variance_node *node) { + int i; + switch (bsize) { + case BLOCK_64X64: { + v64x64 *vt = (v64x64 *) data; + node->part_variances = &vt->part_variances; + for (i = 0; i < 4; i++) + node->split[i] = &vt->split[i].part_variances.none; + break; + } + case BLOCK_32X32: { + v32x32 *vt = (v32x32 *) data; + node->part_variances = &vt->part_variances; + for (i = 0; i < 4; i++) + node->split[i] = &vt->split[i].part_variances.none; + break; + } + case BLOCK_16X16: { + v16x16 *vt = (v16x16 *) data; + node->part_variances = &vt->part_variances; + for (i = 0; i < 4; i++) + node->split[i] = &vt->split[i].part_variances.none; + break; + } + case BLOCK_8X8: { + v8x8 *vt = (v8x8 *) data; + node->part_variances = &vt->part_variances; + for (i = 0; i < 4; i++) + node->split[i] = &vt->split[i]; + break; + } + default: { + assert(0); + } + } +} + +// Set variance values given sum square error, sum error, count. +static void fill_variance(int64_t s2, int64_t s, int c, var *v) { + v->sum_square_error = s2; + v->sum_error = s; + v->count = c; + if (c > 0) + v->variance = (int)(256 * + (v->sum_square_error - v->sum_error * v->sum_error / + v->count) / v->count); + else + v->variance = 0; +} + +void sum_2_variances(const var *a, const var *b, var *r) { + fill_variance(a->sum_square_error + b->sum_square_error, + a->sum_error + b->sum_error, a->count + b->count, r); +} + +static void fill_variance_tree(void *data, BLOCK_SIZE bsize) { + variance_node node; + tree_to_node(data, bsize, &node); + sum_2_variances(node.split[0], node.split[1], &node.part_variances->horz[0]); + sum_2_variances(node.split[2], node.split[3], &node.part_variances->horz[1]); + sum_2_variances(node.split[0], node.split[2], &node.part_variances->vert[0]); + sum_2_variances(node.split[1], node.split[3], &node.part_variances->vert[1]); + sum_2_variances(&node.part_variances->vert[0], &node.part_variances->vert[1], + &node.part_variances->none); +} + +static int set_vt_partitioning(VP9_COMP *cpi, + void *data, + const TileInfo *const tile, + BLOCK_SIZE bsize, + int mi_row, + int mi_col, + int mi_size) { + VP9_COMMON * const cm = &cpi->common; + variance_node vt; + const int block_width = num_8x8_blocks_wide_lookup[bsize]; + const int block_height = num_8x8_blocks_high_lookup[bsize]; + // TODO(debargha): Choose this more intelligently. + const int64_t threshold_multiplier = 25; + int64_t threshold = threshold_multiplier * cpi->common.base_qindex; + assert(block_height == block_width); + + tree_to_node(data, bsize, &vt); + + // Split none is available only if we have more than half a block size + // in width and height inside the visible image. + if (mi_col + block_width / 2 < cm->mi_cols && + mi_row + block_height / 2 < cm->mi_rows && + vt.part_variances->none.variance < threshold) { + set_block_size(cpi, tile, mi_row, mi_col, bsize); + return 1; + } + + // Vertical split is available on all but the bottom border. + if (mi_row + block_height / 2 < cm->mi_rows && + vt.part_variances->vert[0].variance < threshold && + vt.part_variances->vert[1].variance < threshold) { + BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_VERT); + set_block_size(cpi, tile, mi_row, mi_col, subsize); + set_block_size(cpi, tile, mi_row, mi_col + block_width / 2, subsize); + return 1; + } + + // Horizontal split is available on all but the right border. + if (mi_col + block_width / 2 < cm->mi_cols && + vt.part_variances->horz[0].variance < threshold && + vt.part_variances->horz[1].variance < threshold) { + BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_HORZ); + set_block_size(cpi, tile, mi_row, mi_col, subsize); + set_block_size(cpi, tile, mi_row + block_height / 2, mi_col, subsize); + return 1; + } + return 0; +} + +// TODO(debargha): Fix this function and make it work as expected. +static void choose_partitioning(VP9_COMP *cpi, + const TileInfo *const tile, + int mi_row, int mi_col) { + VP9_COMMON * const cm = &cpi->common; + MACROBLOCK *x = &cpi->mb; + MACROBLOCKD *xd = &cpi->mb.e_mbd; + + int i, j, k; + v64x64 vt; + uint8_t *s; + const uint8_t *d; + int sp; + int dp; + int pixels_wide = 64, pixels_high = 64; + int_mv nearest_mv, near_mv; + const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME); + const struct scale_factors *const sf = &cm->frame_refs[LAST_FRAME - 1].sf; + + vp9_zero(vt); + set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64); + + if (xd->mb_to_right_edge < 0) + pixels_wide += (xd->mb_to_right_edge >> 3); + if (xd->mb_to_bottom_edge < 0) + pixels_high += (xd->mb_to_bottom_edge >> 3); + + s = x->plane[0].src.buf; + sp = x->plane[0].src.stride; + + if (cm->frame_type != KEY_FRAME) { + vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col, sf); + + xd->mi_8x8[0]->mbmi.ref_frame[0] = LAST_FRAME; + xd->mi_8x8[0]->mbmi.sb_type = BLOCK_64X64; + vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, + xd->mi_8x8[0]->mbmi.ref_mvs[LAST_FRAME], + &nearest_mv, &near_mv); + + xd->mi_8x8[0]->mbmi.mv[0] = nearest_mv; + vp9_build_inter_predictors_sby(xd, mi_row, mi_col, BLOCK_64X64); + + d = xd->plane[0].dst.buf; + dp = xd->plane[0].dst.stride; + } else { + d = VP9_VAR_OFFS; + dp = 0; + } + + // Fill in the entire tree of 8x8 variances for splits. + for (i = 0; i < 4; i++) { + const int x32_idx = ((i & 1) << 5); + const int y32_idx = ((i >> 1) << 5); + for (j = 0; j < 4; j++) { + const int x16_idx = x32_idx + ((j & 1) << 4); + const int y16_idx = y32_idx + ((j >> 1) << 4); + v16x16 *vst = &vt.split[i].split[j]; + for (k = 0; k < 4; k++) { + int x_idx = x16_idx + ((k & 1) << 3); + int y_idx = y16_idx + ((k >> 1) << 3); + unsigned int sse = 0; + int sum = 0; + if (x_idx < pixels_wide && y_idx < pixels_high) + vp9_get_sse_sum_8x8(s + y_idx * sp + x_idx, sp, + d + y_idx * dp + x_idx, dp, &sse, &sum); + fill_variance(sse, sum, 64, &vst->split[k].part_variances.none); + } + } + } + // Fill the rest of the variance tree by summing split partition values. + for (i = 0; i < 4; i++) { + for (j = 0; j < 4; j++) { + fill_variance_tree(&vt.split[i].split[j], BLOCK_16X16); + } + fill_variance_tree(&vt.split[i], BLOCK_32X32); + } + fill_variance_tree(&vt, BLOCK_64X64); + + // Now go through the entire structure, splitting every block size until + // we get to one that's got a variance lower than our threshold, or we + // hit 8x8. + if (!set_vt_partitioning(cpi, &vt, tile, BLOCK_64X64, + mi_row, mi_col, 8)) { + for (i = 0; i < 4; ++i) { + const int x32_idx = ((i & 1) << 2); + const int y32_idx = ((i >> 1) << 2); + if (!set_vt_partitioning(cpi, &vt.split[i], tile, BLOCK_32X32, + (mi_row + y32_idx), (mi_col + x32_idx), 4)) { + for (j = 0; j < 4; ++j) { + const int x16_idx = ((j & 1) << 1); + const int y16_idx = ((j >> 1) << 1); + // NOTE: This is a temporary hack to disable 8x8 partitions, + // since it works really bad - possibly due to a bug +#define DISABLE_8X8_VAR_BASED_PARTITION +#ifdef DISABLE_8X8_VAR_BASED_PARTITION + if (mi_row + y32_idx + y16_idx + 1 < cm->mi_rows && + mi_row + x32_idx + x16_idx + 1 < cm->mi_cols) { + set_block_size(cpi, tile, + (mi_row + y32_idx + y16_idx), + (mi_col + x32_idx + x16_idx), + BLOCK_16X16); + } else { + for (k = 0; k < 4; ++k) { + const int x8_idx = (k & 1); + const int y8_idx = (k >> 1); + set_block_size(cpi, tile, + (mi_row + y32_idx + y16_idx + y8_idx), + (mi_col + x32_idx + x16_idx + x8_idx), + BLOCK_8X8); + } + } +#else + if (!set_vt_partitioning(cpi, &vt.split[i].split[j], tile, + BLOCK_16X16, + (mi_row + y32_idx + y16_idx), + (mi_col + x32_idx + x16_idx), 2)) { + for (k = 0; k < 4; ++k) { + const int x8_idx = (k & 1); + const int y8_idx = (k >> 1); + set_block_size(cpi, tile, + (mi_row + y32_idx + y16_idx + y8_idx), + (mi_col + x32_idx + x16_idx + x8_idx), + BLOCK_8X8); + } + } +#endif + } + } + } + } +} + // Original activity measure from Tim T's code. static unsigned int tt_activity_measure(MACROBLOCK *x) { unsigned int sse; @@ -571,117 +1011,6 @@ void vp9_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src, x->e_mbd.plane[i].subsampling_y); } -static int is_block_in_mb_map(VP9_COMP *cpi, int mi_row, int mi_col, - BLOCK_SIZE bsize) { - VP9_COMMON *const cm = &cpi->common; - const int mb_rows = cm->mb_rows; - const int mb_cols = cm->mb_cols; - const int mb_row = mi_row >> 1; - const int mb_col = mi_col >> 1; - const int mb_width = num_8x8_blocks_wide_lookup[bsize] >> 1; - const int mb_height = num_8x8_blocks_high_lookup[bsize] >> 1; - int r, c; - if (bsize <= BLOCK_16X16) { - return cpi->active_map[mb_row * mb_cols + mb_col]; - } - for (r = 0; r < mb_height; ++r) { - for (c = 0; c < mb_width; ++c) { - int row = mb_row + r; - int col = mb_col + c; - if (row >= mb_rows || col >= mb_cols) - continue; - if (cpi->active_map[row * mb_cols + col]) - return 1; - } - } - return 0; -} - -static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile, - int mi_row, int mi_col, BLOCK_SIZE bsize) { - MACROBLOCK *const x = &cpi->mb; - VP9_COMMON *const cm = &cpi->common; - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *mbmi; - const int idx_str = xd->mode_info_stride * mi_row + mi_col; - const int mi_width = num_8x8_blocks_wide_lookup[bsize]; - const int mi_height = num_8x8_blocks_high_lookup[bsize]; - const int mb_row = mi_row >> 1; - const int mb_col = mi_col >> 1; - const int idx_map = mb_row * cm->mb_cols + mb_col; - const struct segmentation *const seg = &cm->seg; - - set_skip_context(xd, cpi->above_context, cpi->left_context, mi_row, mi_col); - - // Activity map pointer - x->mb_activity_ptr = &cpi->mb_activity_map[idx_map]; - - if (cpi->active_map_enabled && !x->e_mbd.lossless) { - x->in_active_map = is_block_in_mb_map(cpi, mi_row, mi_col, bsize); - } else { - x->in_active_map = 1; - } - - xd->mi_8x8 = cm->mi_grid_visible + idx_str; - xd->prev_mi_8x8 = cm->prev_mi_grid_visible + idx_str; - xd->mi_8x8[0] = cm->mi + idx_str; - - mbmi = &xd->mi_8x8[0]->mbmi; - - // Set up destination pointers - vp9_setup_dst_planes(xd, get_frame_new_buffer(cm), mi_row, mi_col); - - // Set up limit values for MV components - // mv beyond the range do not produce new/different prediction block - x->mv_row_min = -(((mi_row + mi_height) * MI_SIZE) + VP9_INTERP_EXTEND); - x->mv_col_min = -(((mi_col + mi_width) * MI_SIZE) + VP9_INTERP_EXTEND); - x->mv_row_max = (cm->mi_rows - mi_row) * MI_SIZE + VP9_INTERP_EXTEND; - x->mv_col_max = (cm->mi_cols - mi_col) * MI_SIZE + VP9_INTERP_EXTEND; - - // Set up distance of MB to edge of frame in 1/8th pel units - assert(!(mi_col & (mi_width - 1)) && !(mi_row & (mi_height - 1))); - set_mi_row_col(xd, tile, mi_row, mi_height, mi_col, mi_width, - cm->mi_rows, cm->mi_cols); - - /* set up source buffers */ - vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col); - - /* R/D setup */ - x->rddiv = cpi->RDDIV; - x->rdmult = cpi->RDMULT; - - /* segment ID */ - if (seg->enabled) { - if (cpi->oxcf.aq_mode != VARIANCE_AQ) { - const uint8_t *const map = seg->update_map ? cpi->segmentation_map - : cm->last_frame_seg_map; - mbmi->segment_id = vp9_get_segment_id(cm, map, bsize, mi_row, mi_col); - } - vp9_init_plane_quantizers(cpi, x); - - if (seg->enabled && cpi->seg0_cnt > 0 && - !vp9_segfeature_active(seg, 0, SEG_LVL_REF_FRAME) && - vp9_segfeature_active(seg, 1, SEG_LVL_REF_FRAME)) { - cpi->seg0_progress = (cpi->seg0_idx << 16) / cpi->seg0_cnt; - } else { - const int y = mb_row & ~3; - const int x = mb_col & ~3; - const int p16 = ((mb_row & 1) << 1) + (mb_col & 1); - const int p32 = ((mb_row & 2) << 2) + ((mb_col & 2) << 1); - const int tile_progress = tile->mi_col_start * cm->mb_rows >> 1; - const int mb_cols = (tile->mi_col_end - tile->mi_col_start) >> 1; - - cpi->seg0_progress = ((y * mb_cols + x * 4 + p32 + p16 + tile_progress) - << 16) / cm->MBs; - } - - x->encode_breakout = cpi->segment_encode_breakout[mbmi->segment_id]; - } else { - mbmi->segment_id = 0; - x->encode_breakout = cpi->encode_breakout; - } -} - static void rd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile, int mi_row, int mi_col, int *totalrate, int64_t *totaldist, @@ -1032,9 +1361,9 @@ static BLOCK_SIZE find_partition_size(BLOCK_SIZE bsize, // However, at the bottom and right borders of the image the requested size // may not be allowed in which case this code attempts to choose the largest // allowable partition. -static void set_partitioning(VP9_COMP *cpi, const TileInfo *const tile, - MODE_INFO **mi_8x8, int mi_row, int mi_col, - BLOCK_SIZE bsize) { +static void set_fixed_partitioning(VP9_COMP *cpi, const TileInfo *const tile, + MODE_INFO **mi_8x8, int mi_row, int mi_col, + BLOCK_SIZE bsize) { VP9_COMMON *const cm = &cpi->common; const int mis = cm->mode_info_stride; int row8x8_remaining = tile->mi_row_end - mi_row; @@ -2041,19 +2370,19 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, cpi->mb.source_variance = UINT_MAX; if (cpi->sf.partition_search_type == FIXED_PARTITION) { set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64); - set_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, - cpi->sf.always_this_block_size); + set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, + cpi->sf.always_this_block_size); rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, &dummy_rate, &dummy_dist, 1); - } else if (cpi->sf.partition_search_type == VAR_BASED_FIXED_PARTITION || - cpi->sf.partition_search_type == VAR_BASED_PARTITION) { - // TODO(debargha): Implement VAR_BASED_PARTITION as a separate case. - // Currently both VAR_BASED_FIXED_PARTITION/VAR_BASED_PARTITION - // map to the same thing. + } else if (cpi->sf.partition_search_type == VAR_BASED_FIXED_PARTITION) { BLOCK_SIZE bsize; set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64); bsize = get_rd_var_based_fixed_partition(cpi, mi_row, mi_col); - set_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, bsize); + set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, bsize); + rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, + &dummy_rate, &dummy_dist, 1); + } else if (cpi->sf.partition_search_type == VAR_BASED_PARTITION) { + choose_partitioning(cpi, tile, mi_row, mi_col); rd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, &dummy_rate, &dummy_dist, 1); } else { @@ -2330,14 +2659,40 @@ static void set_mode_info(MB_MODE_INFO *mbmi, BLOCK_SIZE bsize, mbmi->segment_id = 0; } -static void nonrd_use_partition(VP9_COMP *cpi, const TileInfo *const tile, - TOKENEXTRA **tp, int mi_row, int mi_col, - BLOCK_SIZE bsize, int *rate, int64_t *dist) { +static INLINE int get_block_row(int b32i, int b16i, int b8i) { + return ((b32i >> 1) << 2) + ((b16i >> 1) << 1) + (b8i >> 1); +} + +static INLINE int get_block_col(int b32i, int b16i, int b8i) { + return ((b32i & 1) << 2) + ((b16i & 1) << 1) + (b8i & 1); +} + +static void nonrd_pick_sb_modes(VP9_COMP *cpi, const TileInfo *const tile, + int mi_row, int mi_col, + int *rate, int64_t *dist, + BLOCK_SIZE bsize) { VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; - MACROBLOCKD *const xd = &cpi->mb.e_mbd; + MACROBLOCKD *const xd = &x->e_mbd; + set_offsets(cpi, tile, mi_row, mi_col, bsize); + xd->mi_8x8[0]->mbmi.sb_type = bsize; + if (!frame_is_intra_only(cm)) { + vp9_pick_inter_mode(cpi, x, tile, mi_row, mi_col, + rate, dist, bsize); + } else { + MB_PREDICTION_MODE intramode = DC_PRED; + set_mode_info(&xd->mi_8x8[0]->mbmi, bsize, intramode); + } + duplicate_modeinfo_in_sb(cm, xd, mi_row, mi_col, bsize); +} + +static void nonrd_use_fixed_partition(VP9_COMP *cpi, + const TileInfo *const tile, + TOKENEXTRA **tp, + int mi_row, int mi_col, + BLOCK_SIZE bsize, + int *rate, int64_t *dist) { int br, bc; - MB_PREDICTION_MODE mode = DC_PRED; int rows = MIN(MI_BLOCK_SIZE, tile->mi_row_end - mi_row); int cols = MIN(MI_BLOCK_SIZE, tile->mi_col_end - mi_col); @@ -2352,29 +2707,148 @@ static void nonrd_use_partition(VP9_COMP *cpi, const TileInfo *const tile, // find prediction mode for each 8x8 block for (br = 0; br < rows; br += bh) { for (bc = 0; bc < cols; bc += bw) { - const int row = mi_row + br; - const int col = mi_col + bc; - const BLOCK_SIZE bs = find_partition_size(bsize, rows - br, cols - bc, - &bh, &bw); - int i, j; + int row = mi_row + br; + int col = mi_col + bc; - set_offsets(cpi, tile, row, col, bs); - - if (cm->frame_type != KEY_FRAME) - vp9_pick_inter_mode(cpi, x, tile, row, col, &brate, &bdist, bs); - else - set_mode_info(&xd->mi_8x8[0]->mbmi, bs, mode); + BLOCK_SIZE bs = find_partition_size(bsize, rows - br, cols - bc, + &bh, &bw); + nonrd_pick_sb_modes(cpi, tile, row, col, &brate, &bdist, bs); *rate += brate; *dist += bdist; + } + } + encode_sb_rt(cpi, tile, tp, mi_row, mi_col, 1, BLOCK_64X64); +} - for (j = 0; j < bh; ++j) - for (i = 0; i < bw; ++i) - xd->mi_8x8[j * cm->mode_info_stride + i] = xd->mi_8x8[0]; +static void nonrd_pick_fixed_partition(VP9_COMP *cpi, + const TileInfo *const tile, + int mi_row, int mi_col, + BLOCK_SIZE bsize) { + MACROBLOCKD *const xd = &cpi->mb.e_mbd; + int br, bc; + int rows = MIN(MI_BLOCK_SIZE, tile->mi_row_end - mi_row); + int cols = MIN(MI_BLOCK_SIZE, tile->mi_col_end - mi_col); + + int bw = num_8x8_blocks_wide_lookup[bsize]; + int bh = num_8x8_blocks_high_lookup[bsize]; + + // Find prediction mode for each 8x8 block. + for (br = 0; br < rows; br += bh) { + for (bc = 0; bc < cols; bc += bw) { + int row = mi_row + br; + int col = mi_col + bc; + + BLOCK_SIZE bs = find_partition_size(bsize, rows - br, cols - bc, + &bh, &bw); + set_offsets(cpi, tile, row, col, bs); + xd->mi_8x8[0]->mbmi.sb_type = bs; + duplicate_modeinfo_in_sb(&cpi->common, xd, row, col, bs); } } } +static void nonrd_use_partition(VP9_COMP *cpi, + const TileInfo *const tile, + MODE_INFO **mi_8x8, + TOKENEXTRA **tp, + int mi_row, int mi_col, + BLOCK_SIZE bsize, + int *totrate, int64_t *totdist) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &cpi->mb; + const int bsl = b_width_log2(bsize), hbs = (1 << bsl) / 4; + const int mis = cm->mode_info_stride; + PARTITION_TYPE partition; + BLOCK_SIZE subsize; + int rate; + int64_t dist; + + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) + return; + + if (bsize >= BLOCK_8X8) { + subsize = mi_8x8[0]->mbmi.sb_type; + } else { + subsize = BLOCK_4X4; + } + + partition = partition_lookup[bsl][subsize]; + + switch (partition) { + case PARTITION_NONE: + nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, totrate, totdist, subsize); + break; + case PARTITION_VERT: + *get_sb_index(x, subsize) = 0; + nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, totrate, totdist, subsize); + if (mi_col + hbs < cm->mi_cols) { + *get_sb_index(x, subsize) = 1; + nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col + hbs, + &rate, &dist, subsize); + if (rate != INT_MAX && dist != INT64_MAX && + *totrate != INT_MAX && *totdist != INT64_MAX) { + *totrate += rate; + *totdist += dist; + } + } + break; + case PARTITION_HORZ: + *get_sb_index(x, subsize) = 0; + nonrd_pick_sb_modes(cpi, tile, mi_row, mi_col, totrate, totdist, subsize); + if (mi_row + hbs < cm->mi_rows) { + *get_sb_index(x, subsize) = 1; + nonrd_pick_sb_modes(cpi, tile, mi_row + hbs, mi_col, + &rate, &dist, subsize); + if (rate != INT_MAX && dist != INT64_MAX && + *totrate != INT_MAX && *totdist != INT64_MAX) { + *totrate += rate; + *totdist += dist; + } + } + break; + case PARTITION_SPLIT: + subsize = get_subsize(bsize, PARTITION_SPLIT); + + *get_sb_index(x, subsize) = 0; + nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, + subsize, totrate, totdist); + *get_sb_index(x, subsize) = 1; + nonrd_use_partition(cpi, tile, mi_8x8 + hbs, tp, + mi_row, mi_col + hbs, subsize, + &rate, &dist); + if (rate != INT_MAX && dist != INT64_MAX && + *totrate != INT_MAX && *totdist != INT64_MAX) { + *totrate += rate; + *totdist += dist; + } + *get_sb_index(x, subsize) = 2; + nonrd_use_partition(cpi, tile, mi_8x8 + hbs * mis, tp, + mi_row + hbs, mi_col, subsize, + &rate, &dist); + if (rate != INT_MAX && dist != INT64_MAX && + *totrate != INT_MAX && *totdist != INT64_MAX) { + *totrate += rate; + *totdist += dist; + } + *get_sb_index(x, subsize) = 3; + nonrd_use_partition(cpi, tile, mi_8x8 + hbs * mis + hbs, tp, + mi_row + hbs, mi_col + hbs, subsize, + &rate, &dist); + if (rate != INT_MAX && dist != INT64_MAX && + *totrate != INT_MAX && *totdist != INT64_MAX) { + *totrate += rate; + *totdist += dist; + } + break; + default: + assert("Invalid partition type."); + } + + if (bsize == BLOCK_64X64) + encode_sb_rt(cpi, tile, tp, mi_row, mi_col, 1, bsize); +} + static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, int mi_row, TOKENEXTRA **tp) { int mi_col; @@ -2392,21 +2866,23 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, cpi->mb.source_variance = UINT_MAX; if (cpi->sf.partition_search_type == FIXED_PARTITION) { - nonrd_use_partition(cpi, tile, tp, mi_row, mi_col, - cpi->sf.always_this_block_size, - &dummy_rate, &dummy_dist); - encode_sb_rt(cpi, tile, tp, mi_row, mi_col, 1, BLOCK_64X64); - } else if (cpi->sf.partition_search_type == VAR_BASED_FIXED_PARTITION || - cpi->sf.partition_search_type == VAR_BASED_PARTITION) { - // TODO(debargha): Implement VAR_BASED_PARTITION as a separate case. - // Currently both VAR_BASED_FIXED_PARTITION/VAR_BASED_PARTITION - // map to the same thing. + nonrd_use_fixed_partition(cpi, tile, tp, mi_row, mi_col, + cpi->sf.always_this_block_size, + &dummy_rate, &dummy_dist); + } else if (cpi->sf.partition_search_type == VAR_BASED_FIXED_PARTITION) { BLOCK_SIZE bsize = get_nonrd_var_based_fixed_partition(cpi, mi_row, mi_col); - nonrd_use_partition(cpi, tile, tp, mi_row, mi_col, - bsize, &dummy_rate, &dummy_dist); - encode_sb_rt(cpi, tile, tp, mi_row, mi_col, 1, BLOCK_64X64); + nonrd_use_fixed_partition(cpi, tile, tp, mi_row, mi_col, + bsize, &dummy_rate, &dummy_dist); + } else if (cpi->sf.partition_search_type == VAR_BASED_PARTITION) { + const int idx_str = cpi->common.mode_info_stride * mi_row + mi_col; + MODE_INFO **mi_8x8 = cpi->common.mi_grid_visible + idx_str; + int dummy_rate; + int64_t dummy_dist; + choose_partitioning(cpi, tile, mi_row, mi_col); + nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64, + &dummy_rate, &dummy_dist); } else { assert(0); } diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index 26f1a02..1b28dee 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -866,7 +866,7 @@ int vp9_square_search(const MACROBLOCK *x, do_init_search, 0, vfp, use_mvcost, center_mv, best_mv, square_num_candidates, square_candidates); -}; +} int vp9_fast_hex_search(const MACROBLOCK *x, MV *ref_mv, diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 3775a42..1c8b0bf 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -874,7 +874,7 @@ static void set_rt_speed_feature(VP9_COMMON *cm, if (speed >= 8) { int i; for (i = 0; i < BLOCK_SIZES; ++i) - sf->disable_inter_mode_mask[i] = 14; // only search NEARESTMV (0) + sf->disable_inter_mode_mask[i] = 14; // only search NEARESTMV (0) } } diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index 146e23d..18edbb0 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -250,7 +250,6 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, x->skip = 0; if (!x->in_active_map) x->skip = 1; - // initialize mode decisions *returnrate = INT_MAX; *returndistortion = INT64_MAX; -- 2.7.4