From 25e55526301eba7d6e5c68e25402e9b2102976d8 Mon Sep 17 00:00:00 2001 From: hkuang Date: Thu, 12 Dec 2013 20:33:06 -0800 Subject: [PATCH] Remove border extension in intra frame prediction. Change-Id: Id677df4d3dbbed6fdf7319ca6464f19cf32c8176 --- vp9/common/vp9_blockd.h | 41 ++-------------- vp9/common/vp9_reconintra.c | 107 +++++++++++++++++++++++++++++++++++------- vp9/common/vp9_reconintra.h | 3 +- vp9/decoder/vp9_decodeframe.c | 7 ++- vp9/encoder/vp9_encodeframe.c | 3 ++ vp9/encoder/vp9_encodemb.c | 11 ++--- vp9/encoder/vp9_mbgraph.c | 3 +- vp9/encoder/vp9_rdopt.c | 2 +- 8 files changed, 107 insertions(+), 70 deletions(-) diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index 93f96c8..ead4661 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -238,6 +238,9 @@ typedef struct macroblockd { /* pointers to reference frames */ const YV12_BUFFER_CONFIG *ref_buf[2]; + /* pointer to current frame */ + const YV12_BUFFER_CONFIG *cur_buf; + int lossless; /* Inverse transform function pointers. */ void (*itxm_add)(const int16_t *input, uint8_t *dest, int stride, int eob); @@ -409,44 +412,6 @@ static void txfrm_block_to_raster_xy(BLOCK_SIZE plane_bsize, *y = (raster_mb >> tx_cols_log2) << tx_size; } -static void extend_for_intra(MACROBLOCKD *xd, BLOCK_SIZE plane_bsize, - int plane, int aoff, int loff) { - struct macroblockd_plane *const pd = &xd->plane[plane]; - uint8_t *const buf = pd->dst.buf; - const int stride = pd->dst.stride; - const int x = aoff * 4 - 1; - const int y = loff * 4 - 1; - // Copy a pixel into the umv if we are in a situation where the block size - // extends into the UMV. - // TODO(JBB): Should be able to do the full extend in place so we don't have - // to do this multiple times. - if (xd->mb_to_right_edge < 0) { - const int bw = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; - const int umv_border_start = bw + (xd->mb_to_right_edge >> - (3 + pd->subsampling_x)); - - if (x + bw > umv_border_start) - vpx_memset(&buf[y * stride + umv_border_start], - buf[y * stride + umv_border_start - 1], bw); - } - - if (xd->mb_to_bottom_edge < 0) { - if (xd->left_available || x >= 0) { - const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize]; - const int umv_border_start = - bh + (xd->mb_to_bottom_edge >> (3 + pd->subsampling_y)); - - if (y + bh > umv_border_start) { - const uint8_t c = buf[(umv_border_start - 1) * stride + x]; - uint8_t *d = &buf[umv_border_start * stride + x]; - int i; - for (i = 0; i < bh; ++i, d += stride) - *d = c; - } - } - } -} - static void set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, int has_eob, int aoff, int loff) { diff --git a/vp9/common/vp9_reconintra.c b/vp9/common/vp9_reconintra.c index eb643b0..96ba3e4 100644 --- a/vp9/common/vp9_reconintra.c +++ b/vp9/common/vp9_reconintra.c @@ -313,17 +313,21 @@ static void init_intra_pred_fn_ptrs(void) { #undef intra_pred_allsizes } -static void build_intra_predictors(const uint8_t *ref, int ref_stride, - uint8_t *dst, int dst_stride, +static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref, + int ref_stride, uint8_t *dst, int dst_stride, MB_PREDICTION_MODE mode, TX_SIZE tx_size, int up_available, int left_available, - int right_available) { + int right_available, int x, int y, + int plane) { int i; DECLARE_ALIGNED_ARRAY(16, uint8_t, left_col, 64); DECLARE_ALIGNED_ARRAY(16, uint8_t, above_data, 128 + 16); uint8_t *above_row = above_data + 16; const uint8_t *const_above_row = above_row; const int bs = 4 << tx_size; + int frame_width, frame_height; + int x0, y0; + const struct macroblockd_plane *const pd = &xd->plane[plane]; // 127 127 127 .. 127 127 127 127 127 127 // 129 A B .. Y Z @@ -334,26 +338,90 @@ static void build_intra_predictors(const uint8_t *ref, int ref_stride, once(init_intra_pred_fn_ptrs); + // Get current frame pointer, width and height. + if (plane == 0) { + frame_width = xd->cur_buf->y_width; + frame_height = xd->cur_buf->y_height; + } else { + frame_width = xd->cur_buf->uv_width; + frame_height = xd->cur_buf->uv_height; + } + + // Get block position in current frame. + x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x; + y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y; + // left if (left_available) { - for (i = 0; i < bs; i++) - left_col[i] = ref[i * ref_stride - 1]; + if (xd->mb_to_bottom_edge < 0) { + /* slower path if the block needs border extension */ + if (y0 + bs <= frame_height) { + for (i = 0; i < bs; ++i) + left_col[i] = ref[i * ref_stride - 1]; + } else { + const int extend_bottom = frame_height - y0; + for (i = 0; i < extend_bottom; ++i) + left_col[i] = ref[i * ref_stride - 1]; + for (; i < bs; ++i) + left_col[i] = ref[(extend_bottom - 1) * ref_stride - 1]; + } + } else { + /* faster path if the block does not need extension */ + for (i = 0; i < bs; ++i) + left_col[i] = ref[i * ref_stride - 1]; + } } else { vpx_memset(left_col, 129, bs); } + // TODO(hkuang) do not extend 2*bs pixels for all modes. // above if (up_available) { const uint8_t *above_ref = ref - ref_stride; - if (bs == 4 && right_available && left_available) { - const_above_row = above_ref; + if (xd->mb_to_right_edge < 0) { + /* slower path if the block needs border extension */ + if (x0 + 2 * bs <= frame_width) { + if (right_available && bs == 4) { + vpx_memcpy(above_row - 1, above_ref - 1, 2 * bs + 1); + } else { + vpx_memcpy(above_row - 1, above_ref - 1, bs + 1); + vpx_memset(above_row + bs, above_row[bs - 1], bs); + } + } else if (x0 + bs <= frame_width) { + const int r = frame_width - x0; + if (right_available && bs == 4) { + vpx_memcpy(above_row - 1, above_ref - 1, r + 1); + vpx_memset(above_row + r, above_row[r - 1], + x0 + 2 * bs - frame_width); + } else { + vpx_memcpy(above_row - 1, above_ref - 1, bs + 1); + vpx_memset(above_row + bs, above_row[bs - 1], bs); + } + } else if (x0 <= frame_width) { + const int r = frame_width - x0; + if (right_available && bs == 4) { + vpx_memcpy(above_row - 1, above_ref - 1, r + 1); + vpx_memset(above_row + r, above_row[r - 1], + x0 + 2 * bs - frame_width); + } else { + vpx_memcpy(above_row - 1, above_ref - 1, r + 1); + vpx_memset(above_row + r, above_row[r - 1], + x0 + 2 * bs - frame_width); + } + above_row[-1] = left_available ? above_ref[-1] : 129; + } } else { - vpx_memcpy(above_row, above_ref, bs); - if (bs == 4 && right_available) - vpx_memcpy(above_row + bs, above_ref + bs, bs); - else - vpx_memset(above_row + bs, above_row[bs - 1], bs); - above_row[-1] = left_available ? above_ref[-1] : 129; + /* faster path if the block does not need extension */ + if (bs == 4 && right_available && left_available) { + const_above_row = above_ref; + } else { + vpx_memcpy(above_row, above_ref, bs); + if (bs == 4 && right_available) + vpx_memcpy(above_row + bs, above_ref + bs, bs); + else + vpx_memset(above_row + bs, above_row[bs - 1], bs); + above_row[-1] = left_available ? above_ref[-1] : 129; + } } } else { vpx_memset(above_row, 127, bs * 2); @@ -370,16 +438,19 @@ static void build_intra_predictors(const uint8_t *ref, int ref_stride, } void vp9_predict_intra_block(const MACROBLOCKD *xd, int block_idx, int bwl_in, - TX_SIZE tx_size, int mode, - const uint8_t *ref, int ref_stride, - uint8_t *dst, int dst_stride) { + TX_SIZE tx_size, int mode, + const uint8_t *ref, int ref_stride, + uint8_t *dst, int dst_stride, + int aoff, int loff, int plane) { const int bwl = bwl_in - tx_size; const int wmask = (1 << bwl) - 1; const int have_top = (block_idx >> bwl) || xd->up_available; const int have_left = (block_idx & wmask) || xd->left_available; const int have_right = ((block_idx & wmask) != wmask); + const int x = aoff * 4; + const int y = loff * 4; assert(bwl >= 0); - build_intra_predictors(ref, ref_stride, dst, dst_stride, mode, tx_size, - have_top, have_left, have_right); + build_intra_predictors(xd, ref, ref_stride, dst, dst_stride, mode, tx_size, + have_top, have_left, have_right, x, y, plane); } diff --git a/vp9/common/vp9_reconintra.h b/vp9/common/vp9_reconintra.h index 6e3f55c..fc916fc 100644 --- a/vp9/common/vp9_reconintra.h +++ b/vp9/common/vp9_reconintra.h @@ -17,5 +17,6 @@ void vp9_predict_intra_block(const MACROBLOCKD *xd, int block_idx, int bwl_in, TX_SIZE tx_size, int mode, const uint8_t *ref, int ref_stride, - uint8_t *dst, int dst_stride); + uint8_t *dst, int dst_stride, + int aoff, int loff, int plane); #endif // VP9_COMMON_VP9_RECONINTRA_H_ diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c index 56b05ce..57b29ce 100644 --- a/vp9/decoder/vp9_decodeframe.c +++ b/vp9/decoder/vp9_decodeframe.c @@ -305,12 +305,10 @@ static void predict_and_reconstruct_intra_block(int plane, int block, txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x, &y); dst = &pd->dst.buf[4 * y * pd->dst.stride + 4 * x]; - if (xd->mb_to_right_edge < 0 || xd->mb_to_bottom_edge < 0) - extend_for_intra(xd, plane_bsize, plane, x, y); - vp9_predict_intra_block(xd, block >> (tx_size << 1), b_width_log2(plane_bsize), tx_size, mode, - dst, pd->dst.stride, dst, pd->dst.stride); + dst, pd->dst.stride, dst, pd->dst.stride, + x, y, plane); if (!mi->mbmi.skip_coeff) { const int eob = vp9_decode_block_tokens(cm, xd, plane, block, @@ -1333,6 +1331,7 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) { const int tile_rows = 1 << cm->log2_tile_rows; const int tile_cols = 1 << cm->log2_tile_cols; YV12_BUFFER_CONFIG *const new_fb = get_frame_new_buffer(cm); + xd->cur_buf = new_fb; if (!first_partition_size) { // showing a frame directly diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 21cace6..f703ca3 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -548,6 +548,9 @@ void vp9_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src, src->alpha_stride}; int i; + // Set current frame pointer. + x->e_mbd.cur_buf = src; + for (i = 0; i < MAX_MB_PLANE; i++) setup_pred_plane(&x->plane[i].src, buffers[i], strides[i], mi_row, mi_col, NULL, x->e_mbd.plane[i].subsampling_x, diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index c6b1268..11ebd3c 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -547,9 +547,6 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, src = &p->src.buf[4 * (j * p->src.stride + i)]; src_diff = &p->src_diff[4 * (j * diff_stride + i)]; - if (xd->mb_to_right_edge < 0 || xd->mb_to_bottom_edge < 0) - extend_for_intra(xd, plane_bsize, plane, i, j); - // if (x->optimize) // vp9_optimize_b(plane, block, plane_bsize, tx_size, x, args->ctx); @@ -560,7 +557,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, vp9_predict_intra_block(xd, block >> 6, bwl, TX_32X32, mode, x->skip_encode ? src : dst, x->skip_encode ? p->src.stride : pd->dst.stride, - dst, pd->dst.stride); + dst, pd->dst.stride, i, j, plane); if (!x->skip_recode) { vp9_subtract_block(32, 32, src_diff, diff_stride, src, p->src.stride, dst, pd->dst.stride); @@ -583,7 +580,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, vp9_predict_intra_block(xd, block >> 4, bwl, TX_16X16, mode, x->skip_encode ? src : dst, x->skip_encode ? p->src.stride : pd->dst.stride, - dst, pd->dst.stride); + dst, pd->dst.stride, i, j, plane); if (!x->skip_recode) { vp9_subtract_block(16, 16, src_diff, diff_stride, src, p->src.stride, dst, pd->dst.stride); @@ -602,7 +599,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, vp9_predict_intra_block(xd, block >> 2, bwl, TX_8X8, mode, x->skip_encode ? src : dst, x->skip_encode ? p->src.stride : pd->dst.stride, - dst, pd->dst.stride); + dst, pd->dst.stride, i, j, plane); if (!x->skip_recode) { vp9_subtract_block(8, 8, src_diff, diff_stride, src, p->src.stride, dst, pd->dst.stride); @@ -625,7 +622,7 @@ void vp9_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, vp9_predict_intra_block(xd, block, bwl, TX_4X4, mode, x->skip_encode ? src : dst, x->skip_encode ? p->src.stride : pd->dst.stride, - dst, pd->dst.stride); + dst, pd->dst.stride, i, j, plane); if (!x->skip_recode) { vp9_subtract_block(4, 4, src_diff, diff_stride, diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c index e2ef256..09c3f0e 100644 --- a/vp9/encoder/vp9_mbgraph.c +++ b/vp9/encoder/vp9_mbgraph.c @@ -152,7 +152,8 @@ static int find_best_16x16_intra(VP9_COMP *cpi, xd->mi_8x8[0]->mbmi.mode = mode; vp9_predict_intra_block(xd, 0, 2, TX_16X16, mode, x->plane[0].src.buf, x->plane[0].src.stride, - xd->plane[0].dst.buf, xd->plane[0].dst.stride); + xd->plane[0].dst.buf, xd->plane[0].dst.stride, + 0, 0, 0); err = vp9_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].dst.buf, xd->plane[0].dst.stride, best_err); diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 9bade98..cde4bb8 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -1042,7 +1042,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, TX_4X4, mode, x->skip_encode ? src : dst, x->skip_encode ? src_stride : dst_stride, - dst, dst_stride); + dst, dst_stride, idx, idy, 0); vp9_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, dst_stride); -- 2.7.4