From: John Koleszar Date: Sat, 13 Apr 2013 00:19:57 +0000 (-0700) Subject: make buid_inter_predictors block size agnostic (luma) X-Git-Tag: v1.3.0~1106^2~199^2~2 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=4924934d2bf860a13ebd8b3f28fe78ea7d9ccb25;p=platform%2Fupstream%2Flibvpx.git make buid_inter_predictors block size agnostic (luma) This commit converts the luma versions of vp9_build_inter_predictors_sb to use a common function. Update the convolution functions to support block sizes larger than 16x16, and add a foreach_predicted_block walker. Next step will be to calculate the UV motion vector and implement SBUV, then fold in vp9_build_inter16x16_predictors_mb and SPLITMV. At the 16x16, 32x32, and 64x64 levels implemented in this commit, each plane is predicted with only a single call to vp9_build_inter_predictor. This is not yet called for SPLITMV. If the notion of SPLITMV/I8X8/I4X4 goes away, then the prediction block walker can go away, since we'll always predict the whole bsize in a single step. Implemented using a block walker at this stage for SPLITMV, as a 4x4 "prediction block size" within the BLOCK_SIZE_MB16X16 macroblock. It would also support other rectangular sizes too, if the blocks smaller than 16x16 remain implemented as a SPLITMV-like thing. Just using 4x4 for now. There's also a potential to combine with the foreach_transformed_block walker if the logic for calculating the size of the subsampled transform is made more straightforward, perhaps as a consequence of supporing smaller macroblocks than 16x16. Will watch what happens there. Change-Id: Iddd9973398542216601b630c628b9b7fdee33fe2 --- diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index 7eb9f8e..dd95761 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -875,4 +875,62 @@ static INLINE void foreach_transformed_block_uv( } } +// TODO(jkoleszar): In principle, pred_w, pred_h are unnecessary, as we could +// calculate the subsampled BLOCK_SIZE_TYPE, but that type isn't defined for +// sizes smaller than 16x16 yet. +typedef void (*foreach_predicted_block_visitor)(int plane, int block, + BLOCK_SIZE_TYPE bsize, + int pred_w, int pred_h, + void *arg); +static INLINE void foreach_predicted_block_in_plane( + const MACROBLOCKD* const xd, BLOCK_SIZE_TYPE bsize, int plane, + foreach_predicted_block_visitor visit, void *arg) { + const int bw = b_width_log2(bsize), bh = b_height_log2(bsize); + + // block sizes in number of 4x4 blocks log 2 ("*_b") + // 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8 + const MB_PREDICTION_MODE mode = xd->mode_info_context->mbmi.mode; + const int block_size_b = bw + bh; + + // subsampled size of the block + const int ss_sum = xd->plane[plane].subsampling_x + + xd->plane[plane].subsampling_y; + const int ss_block_size = block_size_b - ss_sum; + + // size of the predictor to use. + // TODO(jkoleszar): support I8X8, I4X4 + const int pred_w = bw - xd->plane[plane].subsampling_x; + const int pred_h = bh - xd->plane[plane].subsampling_y; + const int pred_b = mode == SPLITMV ? 0 : pred_w + pred_h; + const int step = 1 << pred_b; + + int i; + + assert(pred_b <= block_size_b); + assert(pred_b == ss_block_size); + for (i = 0; i < (1 << ss_block_size); i += step) { + visit(plane, i, bsize, pred_w, pred_h, arg); + } +} +static INLINE void foreach_predicted_block( + const MACROBLOCKD* const xd, BLOCK_SIZE_TYPE bsize, + foreach_predicted_block_visitor visit, void *arg) { + int plane; + + for (plane = 0; plane < MAX_MB_PLANE; plane++) { + foreach_predicted_block_in_plane(xd, bsize, plane, visit, arg); + } +} +static INLINE void foreach_predicted_block_uv( + const MACROBLOCKD* const xd, BLOCK_SIZE_TYPE bsize, + foreach_predicted_block_visitor visit, void *arg) { + int plane; + + for (plane = 1; plane < MAX_MB_PLANE; plane++) { + foreach_predicted_block_in_plane(xd, bsize, plane, visit, arg); + } +} + + + #endif // VP9_COMMON_VP9_BLOCKD_H_ diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c index 71be77d..002e6eb 100644 --- a/vp9/common/vp9_reconinter.c +++ b/vp9/common/vp9_reconinter.c @@ -527,6 +527,92 @@ static void clamp_uvmv_to_umv_border(MV *mv, const MACROBLOCKD *xd) { (xd->mb_to_bottom_edge + (16 << 3)) >> 1 : mv->row; } +#if !CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT +// TODO(jkoleszar): yet another mv clamping function :-( +MV clamp_mv_to_umv_border_sb(const MV *src_mv, + int bwl, int bhl, + int mb_to_left_edge, int mb_to_top_edge, + int mb_to_right_edge, int mb_to_bottom_edge) { + /* If the MV points so far into the UMV border that no visible pixels + * are used for reconstruction, the subpel part of the MV can be + * discarded and the MV limited to 16 pixels with equivalent results. + */ + const int epel_left = (VP9_INTERP_EXTEND + (4 << bwl)) << 3; + const int epel_right = epel_left - (1 << 3); + const int epel_top = (VP9_INTERP_EXTEND + (4 << bhl)) << 3; + const int epel_bottom = epel_top - (1 << 3); + MV clamped_mv; + clamped_mv.col = clamp(src_mv->col, + mb_to_left_edge - epel_left, + mb_to_right_edge + epel_right); + clamped_mv.row = clamp(src_mv->row, + mb_to_top_edge - epel_top, + mb_to_bottom_edge + epel_bottom); + return clamped_mv; +} + +struct build_inter_predictors_args { + MACROBLOCKD *xd; + uint8_t* dst[MAX_MB_PLANE]; + int dst_stride[MAX_MB_PLANE]; + int x; + int y; +}; +static void build_inter_predictors(int plane, int block, + BLOCK_SIZE_TYPE bsize, + int pred_w, int pred_h, + void *argv) { + const struct build_inter_predictors_args* const arg = argv; + const int bwl = pred_w, bw = 4 << bwl; + const int bhl = pred_h, bh = 4 << bhl; + const int x_idx = block & ((1 << bwl) - 1), y_idx = block >> bwl; + const int x = x_idx * 4, y = y_idx * 4; + MACROBLOCKD * const xd = arg->xd; + const int use_second_ref = xd->mode_info_context->mbmi.second_ref_frame > 0; + int which_mv; + + for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) { + const MV* const mv = (xd->mode_info_context->mbmi.mode == SPLITMV) + ? &xd->block[block].bmi.as_mv[which_mv].as_mv + : &xd->mode_info_context->mbmi.mv[which_mv].as_mv; + + const uint8_t * const base_pre = which_mv ? xd->second_pre.y_buffer + : xd->pre.y_buffer; + const int pre_stride = which_mv ? xd->second_pre.y_stride + : xd->pre.y_stride; + const uint8_t *const pre = base_pre + + scaled_buffer_offset(x, y, pre_stride, &xd->scale_factor[which_mv]); + struct scale_factors * const scale = + plane == 0 ? &xd->scale_factor[which_mv] : &xd->scale_factor_uv[which_mv]; + + int_mv clamped_mv; + clamped_mv.as_mv = clamp_mv_to_umv_border_sb(mv, bwl, bhl, + xd->mb_to_left_edge, + xd->mb_to_top_edge, + xd->mb_to_right_edge, + xd->mb_to_bottom_edge); + + scale->set_scaled_offsets(scale, arg->y + y, arg->x + x); + + vp9_build_inter_predictor(pre, pre_stride, + arg->dst[plane], arg->dst_stride[plane], + &clamped_mv, &xd->scale_factor[which_mv], + bw, bh, which_mv, &xd->subpix); + } +} +void vp9_build_inter_predictors_sby(MACROBLOCKD *xd, + uint8_t *dst_y, + int dst_ystride, + int mb_row, + int mb_col, + BLOCK_SIZE_TYPE bsize) { + struct build_inter_predictors_args args = { + xd, {dst_y, NULL, NULL}, {dst_ystride, 0, 0}, mb_col * 16, mb_row * 16 + }; + foreach_predicted_block_in_plane(xd, bsize, 0, build_inter_predictors, &args); +} +#endif + #define AVERAGE_WEIGHT (1 << (2 * CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT)) #if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT @@ -867,49 +953,6 @@ static void build_inter16x16_predictors_mby_w(MACROBLOCKD *xd, which_mv ? weight : 0, &xd->subpix); } } - -void vp9_build_inter16x16_predictors_mby(MACROBLOCKD *xd, - uint8_t *dst_y, - int dst_ystride, - int mb_row, - int mb_col) { - int weight = get_implicit_compoundinter_weight(xd, mb_row, mb_col); - - build_inter16x16_predictors_mby_w(xd, dst_y, dst_ystride, weight, - mb_row, mb_col); -} - -#else - -void vp9_build_inter16x16_predictors_mby(MACROBLOCKD *xd, - uint8_t *dst_y, - int dst_ystride, - int mb_row, - int mb_col) { - const int use_second_ref = xd->mode_info_context->mbmi.second_ref_frame > 0; - int which_mv; - - for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) { - const int clamp_mvs = which_mv ? - xd->mode_info_context->mbmi.need_to_clamp_secondmv : - xd->mode_info_context->mbmi.need_to_clamp_mvs; - - uint8_t *base_pre = which_mv ? xd->second_pre.y_buffer : xd->pre.y_buffer; - int pre_stride = which_mv ? xd->second_pre.y_stride : xd->pre.y_stride; - int_mv ymv; - struct scale_factors *scale = &xd->scale_factor[which_mv]; - - ymv.as_int = xd->mode_info_context->mbmi.mv[which_mv].as_int; - - if (clamp_mvs) - clamp_mv_to_umv_border(&ymv.as_mv, xd); - - scale->set_scaled_offsets(scale, mb_row * 16, mb_col * 16); - - vp9_build_inter_predictor(base_pre, pre_stride, dst_y, dst_ystride, - &ymv, scale, 16, 16, which_mv, &xd->subpix); - } -} #endif #if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT @@ -1109,64 +1152,6 @@ void vp9_build_inter_predictors_sby(MACROBLOCKD *x, build_inter_predictors_sby_w(x, dst_y, dst_ystride, weight, mb_row, mb_col, bsize); } - -#else - -// TODO(jingning): vp9_convolve8_ssse3_ limits the dimension up to 16. Currently -// handle inter prediction of block sizes above 16x16 separately from those -// smaller ones. Need to combine them all in to a unified inter prediction -// function. -void vp9_build_inter_predictors_sby(MACROBLOCKD *x, - uint8_t *dst_y, - int dst_ystride, - int mb_row, - int mb_col, - BLOCK_SIZE_TYPE bsize) { - const int bwl = mb_width_log2(bsize), bw = 1 << bwl; - const int bhl = mb_height_log2(bsize), bh = 1 << bhl; - uint8_t *y1 = x->pre.y_buffer; - uint8_t *y2 = x->second_pre.y_buffer; - int edge[4], n; - - edge[0] = x->mb_to_top_edge; - edge[1] = x->mb_to_bottom_edge; - edge[2] = x->mb_to_left_edge; - edge[3] = x->mb_to_right_edge; - - for (n = 0; n < bw * bh; n++) { - const int x_idx = n & (bw - 1), y_idx = n >> bwl; - - x->mb_to_top_edge = edge[0] - ((y_idx * 16) << 3); - x->mb_to_bottom_edge = edge[1] + (((bh - 1 - y_idx) * 16) << 3); - x->mb_to_left_edge = edge[2] - ((x_idx * 16) << 3); - x->mb_to_right_edge = edge[3] + (((bw - 1 - x_idx) * 16) << 3); - - x->pre.y_buffer = y1 + scaled_buffer_offset(x_idx * 16, - y_idx * 16, - x->pre.y_stride, - &x->scale_factor[0]); - if (x->mode_info_context->mbmi.second_ref_frame > 0) { - x->second_pre.y_buffer = y2 + - scaled_buffer_offset(x_idx * 16, - y_idx * 16, - x->second_pre.y_stride, - &x->scale_factor[1]); - } - vp9_build_inter16x16_predictors_mby(x, - dst_y + y_idx * 16 * dst_ystride + x_idx * 16, - dst_ystride, mb_row + y_idx, mb_col + x_idx); - } - x->mb_to_top_edge = edge[0]; - x->mb_to_bottom_edge = edge[1]; - x->mb_to_left_edge = edge[2]; - x->mb_to_right_edge = edge[3]; - - x->pre.y_buffer = y1; - if (x->mode_info_context->mbmi.second_ref_frame > 0) { - x->second_pre.y_buffer = y2; - } -} - #endif #if CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT diff --git a/vp9/common/vp9_reconinter.h b/vp9/common/vp9_reconinter.h index 533d304..77fa9ab 100644 --- a/vp9/common/vp9_reconinter.h +++ b/vp9/common/vp9_reconinter.h @@ -16,12 +16,6 @@ struct subpix_fn_table; -void vp9_build_inter16x16_predictors_mby(MACROBLOCKD *xd, - uint8_t *dst_y, - int dst_ystride, - int mb_row, - int mb_col); - void vp9_build_inter16x16_predictors_mbuv(MACROBLOCKD *xd, uint8_t *dst_u, uint8_t *dst_v, diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index bb6ba70..ae263ca 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -524,6 +524,7 @@ void vp9_first_pass(VP9_COMP *cpi) { x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16) + (VP9BORDERINPIXELS - 16); + set_mb_row(cm, xd, mb_row, 1 << mb_height_log2(BLOCK_SIZE_MB16X16)); // for each macroblock col in image for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { @@ -531,6 +532,7 @@ void vp9_first_pass(VP9_COMP *cpi) { int gf_motion_error = INT_MAX; int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row); + set_mb_col(cm, xd, mb_col, 1 << mb_height_log2(BLOCK_SIZE_MB16X16)); xd->dst.y_buffer = new_yv12->y_buffer + recon_yoffset; xd->dst.u_buffer = new_yv12->u_buffer + recon_uvoffset; xd->dst.v_buffer = new_yv12->v_buffer + recon_uvoffset;