From c3941665e995f12f9aa9b47a32c06d20978993fc Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Sat, 5 Jan 2013 18:20:25 -0800 Subject: [PATCH] 64x64 blocksize support. 3.2% gains on std/hd, 1.0% gains on hd. Change-Id: I481d5df23d8a4fc650a5bcba956554490b2bd200 --- configure | 1 + vp9/common/vp9_blockd.h | 21 +- vp9/common/vp9_common.h | 3 + vp9/common/vp9_findnearmv.c | 62 +- vp9/common/vp9_loopfilter.c | 8 +- vp9/common/vp9_mvref_common.c | 2 +- vp9/common/vp9_onyxc_int.h | 7 +- vp9/common/vp9_pred_common.c | 78 +- vp9/common/vp9_reconinter.c | 64 ++ vp9/common/vp9_reconinter.h | 7 + vp9/common/vp9_reconintra.c | 85 +- vp9/common/vp9_reconintra.h | 6 + vp9/common/vp9_reconintra4x4.c | 16 +- vp9/common/vp9_rtcd_defs.sh | 30 + vp9/decoder/vp9_decodemv.c | 98 ++- vp9/decoder/vp9_decodframe.c | 561 ++++++++----- vp9/encoder/vp9_bitstream.c | 797 +++++++++--------- vp9/encoder/vp9_block.h | 7 +- vp9/encoder/vp9_encodeframe.c | 1728 ++++++++++++++++++++++++---------------- vp9/encoder/vp9_mcomp.c | 4 +- vp9/encoder/vp9_onyx_if.c | 60 +- vp9/encoder/vp9_onyx_int.h | 12 +- vp9/encoder/vp9_rdopt.c | 682 +++++++++++++--- vp9/encoder/vp9_rdopt.h | 21 +- vp9/encoder/vp9_sad_c.c | 67 ++ vp9/encoder/vp9_segmentation.c | 155 ++-- vp9/encoder/vp9_variance_c.c | 74 ++ 27 files changed, 2971 insertions(+), 1685 deletions(-) diff --git a/configure b/configure index 1126ea8..5ed688e 100755 --- a/configure +++ b/configure @@ -240,6 +240,7 @@ EXPERIMENT_LIST=" csm comp_intra_pred superblocks + superblocks64 pred_filter lossless subpelrefmv diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index c430ea2..9ca2b22 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -226,6 +226,16 @@ typedef enum { MAX_REF_FRAMES = 4 } MV_REFERENCE_FRAME; +#if CONFIG_SUPERBLOCKS +typedef enum { + BLOCK_SIZE_MB16X16 = 0, + BLOCK_SIZE_SB32X32 = 1, +#if CONFIG_SUPERBLOCKS64 + BLOCK_SIZE_SB64X64 = 2, +#endif +} BLOCK_SIZE_TYPE; +#endif + typedef struct { MB_PREDICTION_MODE mode, uv_mode; #if CONFIG_COMP_INTRA_PRED @@ -268,8 +278,8 @@ typedef struct { #if CONFIG_SUPERBLOCKS // FIXME need a SB array of 4 MB_MODE_INFOs that - // only needs one encoded_as_sb. - unsigned char encoded_as_sb; + // only needs one sb_type. + BLOCK_SIZE_TYPE sb_type; #endif } MB_MODE_INFO; @@ -415,6 +425,7 @@ typedef struct macroblockd { DECLARE_ALIGNED(32, uint8_t, y_buf[22 * 32]); #endif + int sb_index; int mb_index; // Index of the MB in the SB (0..3) int q_index; @@ -519,7 +530,7 @@ static TX_TYPE get_tx_type_4x4(const MACROBLOCKD *xd, const BLOCKD *b) { return tx_type; #if CONFIG_SUPERBLOCKS // TODO(rbultje, debargha): Explore ADST usage for superblocks - if (xd->mode_info_context->mbmi.encoded_as_sb) + if (xd->mode_info_context->mbmi.sb_type) return tx_type; #endif if (xd->mode_info_context->mbmi.mode == B_PRED && @@ -576,7 +587,7 @@ static TX_TYPE get_tx_type_8x8(const MACROBLOCKD *xd, const BLOCKD *b) { return tx_type; #if CONFIG_SUPERBLOCKS // TODO(rbultje, debargha): Explore ADST usage for superblocks - if (xd->mode_info_context->mbmi.encoded_as_sb) + if (xd->mode_info_context->mbmi.sb_type) return tx_type; #endif if (xd->mode_info_context->mbmi.mode == I8X8_PRED && @@ -611,7 +622,7 @@ static TX_TYPE get_tx_type_16x16(const MACROBLOCKD *xd, const BLOCKD *b) { return tx_type; #if CONFIG_SUPERBLOCKS // TODO(rbultje, debargha): Explore ADST usage for superblocks - if (xd->mode_info_context->mbmi.encoded_as_sb) + if (xd->mode_info_context->mbmi.sb_type) return tx_type; #endif if (xd->mode_info_context->mbmi.mode < I8X8_PRED && diff --git a/vp9/common/vp9_common.h b/vp9/common/vp9_common.h index ee02758..2e1ee4b 100644 --- a/vp9/common/vp9_common.h +++ b/vp9/common/vp9_common.h @@ -21,6 +21,9 @@ #define TRUE 1 #define FALSE 0 +#define MIN(x, y) (((x) < (y)) ? (x) : (y)) +#define MAX(x, y) (((x) > (y)) ? (x) : (y)) + /* Only need this for fixed-size arrays, for structs just assign. */ #define vp9_copy(Dest, Src) { \ diff --git a/vp9/common/vp9_findnearmv.c b/vp9/common/vp9_findnearmv.c index b5d6bda..85982fc 100644 --- a/vp9/common/vp9_findnearmv.c +++ b/vp9/common/vp9_findnearmv.c @@ -191,7 +191,7 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, above_src, xd->dst.y_stride, &sse); score += sse; #if CONFIG_SUPERBLOCKS - if (xd->mode_info_context->mbmi.encoded_as_sb) { + if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB32X32) { vp9_sub_pixel_variance16x2_c(above_ref + offset + 16, ref_y_stride, SP(this_mv.as_mv.col), @@ -199,6 +199,22 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, above_src + 16, xd->dst.y_stride, &sse); score += sse; } +#if CONFIG_SUPERBLOCKS64 + if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB64X64) { + vp9_sub_pixel_variance16x2_c(above_ref + offset + 32, + ref_y_stride, + SP(this_mv.as_mv.col), + SP(this_mv.as_mv.row), + above_src + 32, xd->dst.y_stride, &sse); + score += sse; + vp9_sub_pixel_variance16x2_c(above_ref + offset + 48, + ref_y_stride, + SP(this_mv.as_mv.col), + SP(this_mv.as_mv.row), + above_src + 48, xd->dst.y_stride, &sse); + score += sse; + } +#endif #endif } if (xd->left_available) { @@ -208,7 +224,7 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, left_src, xd->dst.y_stride, &sse); score += sse; #if CONFIG_SUPERBLOCKS - if (xd->mode_info_context->mbmi.encoded_as_sb) { + if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB32X32) { vp9_sub_pixel_variance2x16_c(left_ref + offset + ref_y_stride * 16, ref_y_stride, SP(this_mv.as_mv.col), @@ -217,6 +233,24 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, xd->dst.y_stride, &sse); score += sse; } +#if CONFIG_SUPERBLOCKS64 + if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB64X64) { + vp9_sub_pixel_variance2x16_c(left_ref + offset + ref_y_stride * 32, + ref_y_stride, + SP(this_mv.as_mv.col), + SP(this_mv.as_mv.row), + left_src + xd->dst.y_stride * 32, + xd->dst.y_stride, &sse); + score += sse; + vp9_sub_pixel_variance2x16_c(left_ref + offset + ref_y_stride * 48, + ref_y_stride, + SP(this_mv.as_mv.col), + SP(this_mv.as_mv.row), + left_src + xd->dst.y_stride * 48, + xd->dst.y_stride, &sse); + score += sse; + } +#endif #endif } #else @@ -230,22 +264,42 @@ void vp9_find_best_ref_mvs(MACROBLOCKD *xd, score += vp9_sad16x3(above_src, xd->dst.y_stride, above_ref + offset, ref_y_stride); #if CONFIG_SUPERBLOCKS - if (xd->mode_info_context->mbmi.encoded_as_sb) { + if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB32X32) { score += vp9_sad16x3(above_src + 16, xd->dst.y_stride, above_ref + offset + 16, ref_y_stride); } +#if CONFIG_SUPERBLOCKS64 + if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB64X64) { + score += vp9_sad16x3(above_src + 32, xd->dst.y_stride, + above_ref + offset + 32, ref_y_stride); + score += vp9_sad16x3(above_src + 48, xd->dst.y_stride, + above_ref + offset + 48, ref_y_stride); + } +#endif #endif } if (xd->left_available) { score += vp9_sad3x16(left_src, xd->dst.y_stride, left_ref + offset, ref_y_stride); #if CONFIG_SUPERBLOCKS - if (xd->mode_info_context->mbmi.encoded_as_sb) { + if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB32X32) { score += vp9_sad3x16(left_src + xd->dst.y_stride * 16, xd->dst.y_stride, left_ref + offset + ref_y_stride * 16, ref_y_stride); } +#if CONFIG_SUPERBLOCKS64 + if (xd->mode_info_context->mbmi.sb_type >= BLOCK_SIZE_SB64X64) { + score += vp9_sad3x16(left_src + xd->dst.y_stride * 32, + xd->dst.y_stride, + left_ref + offset + ref_y_stride * 32, + ref_y_stride); + score += vp9_sad3x16(left_src + xd->dst.y_stride * 48, + xd->dst.y_stride, + left_ref + offset + ref_y_stride * 48, + ref_y_stride); + } +#endif #endif } #endif diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c index a928a92..5188aa4 100644 --- a/vp9/common/vp9_loopfilter.c +++ b/vp9/common/vp9_loopfilter.c @@ -228,7 +228,7 @@ void vp9_loop_filter_frame(VP9_COMMON *cm, MACROBLOCKD *xd) { if (mb_col > 0 #if CONFIG_SUPERBLOCKS - && !((mb_col & 1) && mode_info_context->mbmi.encoded_as_sb && + && !((mb_col & 1) && mode_info_context->mbmi.sb_type && ((mode_info_context[0].mbmi.mb_skip_coeff && mode_info_context[-1].mbmi.mb_skip_coeff) #if CONFIG_TX32X32 @@ -253,7 +253,7 @@ void vp9_loop_filter_frame(VP9_COMMON *cm, MACROBLOCKD *xd) { /* don't apply across umv border */ if (mb_row > 0 #if CONFIG_SUPERBLOCKS - && !((mb_row & 1) && mode_info_context->mbmi.encoded_as_sb && + && !((mb_row & 1) && mode_info_context->mbmi.sb_type && ((mode_info_context[0].mbmi.mb_skip_coeff && mode_info_context[-mis].mbmi.mb_skip_coeff) #if CONFIG_TX32X32 @@ -277,7 +277,7 @@ void vp9_loop_filter_frame(VP9_COMMON *cm, MACROBLOCKD *xd) { // FIXME: Not 8x8 aware if (mb_col > 0 #if CONFIG_SUPERBLOCKS - && !((mb_col & 1) && mode_info_context->mbmi.encoded_as_sb && + && !((mb_col & 1) && mode_info_context->mbmi.sb_type && mode_info_context[0].mbmi.mb_skip_coeff && mode_info_context[-1].mbmi.mb_skip_coeff) #endif @@ -292,7 +292,7 @@ void vp9_loop_filter_frame(VP9_COMMON *cm, MACROBLOCKD *xd) { /* don't apply across umv border */ if (mb_row > 0 #if CONFIG_SUPERBLOCKS - && !((mb_row & 1) && mode_info_context->mbmi.encoded_as_sb && + && !((mb_row & 1) && mode_info_context->mbmi.sb_type && mode_info_context[0].mbmi.mb_skip_coeff && mode_info_context[-cm->mode_info_stride].mbmi.mb_skip_coeff) #endif diff --git a/vp9/common/vp9_mvref_common.c b/vp9/common/vp9_mvref_common.c index 4b576e8..bfdc1af 100644 --- a/vp9/common/vp9_mvref_common.c +++ b/vp9/common/vp9_mvref_common.c @@ -237,7 +237,7 @@ void vp9_find_mv_refs( vpx_memset(candidate_scores, 0, sizeof(candidate_scores)); #if CONFIG_SUPERBLOCKS - if (mbmi->encoded_as_sb) { + if (mbmi->sb_type) { mv_ref_search = sb_mv_ref_search; ref_distance_weight = sb_ref_distance_weight; } else { diff --git a/vp9/common/vp9_onyxc_int.h b/vp9/common/vp9_onyxc_int.h index d96e76c..3b62dac 100644 --- a/vp9/common/vp9_onyxc_int.h +++ b/vp9/common/vp9_onyxc_int.h @@ -229,7 +229,7 @@ typedef struct VP9Common { /* Y,U,V,Y2 */ ENTROPY_CONTEXT_PLANES *above_context; /* row of context for each plane */ - ENTROPY_CONTEXT_PLANES left_context[2]; /* (up to) 4 contexts "" */ + ENTROPY_CONTEXT_PLANES left_context[4]; /* (up to) 4 contexts "" */ /* keyframe block modes are predicted by their above, left neighbors */ @@ -248,7 +248,10 @@ typedef struct VP9Common { vp9_prob prob_last_coded; vp9_prob prob_gf_coded; #if CONFIG_SUPERBLOCKS - vp9_prob sb_coded; + vp9_prob sb32_coded; +#if CONFIG_SUPERBLOCKS64 + vp9_prob sb64_coded; +#endif // CONFIG_SUPERBLOCKS64 #endif // Context probabilities when using predictive coding of segment id diff --git a/vp9/common/vp9_pred_common.c b/vp9/common/vp9_pred_common.c index e8a3c4f..f2f35a3 100644 --- a/vp9/common/vp9_pred_common.c +++ b/vp9/common/vp9_pred_common.c @@ -9,6 +9,7 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include "vp9/common/vp9_common.h" #include "vp9/common/vp9_pred_common.h" #include "vp9/common/vp9_seg_common.h" #include "vp9/common/vp9_treecoder.h" @@ -230,13 +231,18 @@ void vp9_set_pred_flag(MACROBLOCKD *const xd, case PRED_SEG_ID: xd->mode_info_context->mbmi.seg_id_predicted = pred_flag; #if CONFIG_SUPERBLOCKS - if (xd->mode_info_context->mbmi.encoded_as_sb) { - if (xd->mb_to_right_edge >= 0) - xd->mode_info_context[1].mbmi.seg_id_predicted = pred_flag; - if (xd->mb_to_bottom_edge >= 0) { - xd->mode_info_context[mis].mbmi.seg_id_predicted = pred_flag; - if (xd->mb_to_right_edge >= 0) - xd->mode_info_context[mis + 1].mbmi.seg_id_predicted = pred_flag; + if (xd->mode_info_context->mbmi.sb_type) { +#define sub(a, b) (b) < 0 ? (a) + (b) : (a) + const int n_mbs = 1 << xd->mode_info_context->mbmi.sb_type; + const int x_mbs = sub(n_mbs, xd->mb_to_right_edge >> 7); + const int y_mbs = sub(n_mbs, xd->mb_to_bottom_edge >> 7); + int x, y; + + for (y = 0; y < y_mbs; y++) { + for (x = !y; x < x_mbs; x++) { + xd->mode_info_context[y * mis + x].mbmi.seg_id_predicted = + pred_flag; + } } } #endif @@ -245,13 +251,16 @@ void vp9_set_pred_flag(MACROBLOCKD *const xd, case PRED_REF: xd->mode_info_context->mbmi.ref_predicted = pred_flag; #if CONFIG_SUPERBLOCKS - if (xd->mode_info_context->mbmi.encoded_as_sb) { - if (xd->mb_to_right_edge >= 0) - xd->mode_info_context[1].mbmi.ref_predicted = pred_flag; - if (xd->mb_to_bottom_edge >= 0) { - xd->mode_info_context[mis].mbmi.ref_predicted = pred_flag; - if (xd->mb_to_right_edge >= 0) - xd->mode_info_context[mis + 1].mbmi.ref_predicted = pred_flag; + if (xd->mode_info_context->mbmi.sb_type) { + const int n_mbs = 1 << xd->mode_info_context->mbmi.sb_type; + const int x_mbs = sub(n_mbs, xd->mb_to_right_edge >> 7); + const int y_mbs = sub(n_mbs, xd->mb_to_bottom_edge >> 7); + int x, y; + + for (y = 0; y < y_mbs; y++) { + for (x = !y; x < x_mbs; x++) { + xd->mode_info_context[y * mis + x].mbmi.ref_predicted = pred_flag; + } } } #endif @@ -260,13 +269,16 @@ void vp9_set_pred_flag(MACROBLOCKD *const xd, case PRED_MBSKIP: xd->mode_info_context->mbmi.mb_skip_coeff = pred_flag; #if CONFIG_SUPERBLOCKS - if (xd->mode_info_context->mbmi.encoded_as_sb) { - if (xd->mb_to_right_edge >= 0) - xd->mode_info_context[1].mbmi.mb_skip_coeff = pred_flag; - if (xd->mb_to_bottom_edge >= 0) { - xd->mode_info_context[mis].mbmi.mb_skip_coeff = pred_flag; - if (xd->mb_to_right_edge >= 0) - xd->mode_info_context[mis + 1].mbmi.mb_skip_coeff = pred_flag; + if (xd->mode_info_context->mbmi.sb_type) { + const int n_mbs = 1 << xd->mode_info_context->mbmi.sb_type; + const int x_mbs = sub(n_mbs, xd->mb_to_right_edge >> 7); + const int y_mbs = sub(n_mbs, xd->mb_to_bottom_edge >> 7); + int x, y; + + for (y = 0; y < y_mbs; y++) { + for (x = !y; x < x_mbs; x++) { + xd->mode_info_context[y * mis + x].mbmi.mb_skip_coeff = pred_flag; + } } } #endif @@ -288,21 +300,25 @@ unsigned char vp9_get_pred_mb_segid(const VP9_COMMON *const cm, // Currently the prediction for the macroblock segment ID is // the value stored for this macroblock in the previous frame. #if CONFIG_SUPERBLOCKS - if (!xd->mode_info_context->mbmi.encoded_as_sb) { + if (!xd->mode_info_context->mbmi.sb_type) { #endif return cm->last_frame_seg_map[MbIndex]; #if CONFIG_SUPERBLOCKS } else { - int seg_id = cm->last_frame_seg_map[MbIndex]; - int mb_col = MbIndex % cm->mb_cols; - int mb_row = MbIndex / cm->mb_cols; - if (mb_col + 1 < cm->mb_cols) - seg_id = seg_id && cm->last_frame_seg_map[MbIndex + 1]; - if (mb_row + 1 < cm->mb_rows) { - seg_id = seg_id && cm->last_frame_seg_map[MbIndex + cm->mb_cols]; - if (mb_col + 1 < cm->mb_cols) - seg_id = seg_id && cm->last_frame_seg_map[MbIndex + cm->mb_cols + 1]; + const int n_mbs = 1 << xd->mode_info_context->mbmi.sb_type; + const int mb_col = MbIndex % cm->mb_cols; + const int mb_row = MbIndex / cm->mb_cols; + const int x_mbs = MIN(n_mbs, cm->mb_cols - mb_col); + const int y_mbs = MIN(n_mbs, cm->mb_rows - mb_row); + int x, y; + unsigned seg_id = -1; + + for (y = mb_row; y < mb_row + y_mbs; y++) { + for (x = mb_col; x < mb_col + x_mbs; x++) { + seg_id = MIN(seg_id, cm->last_frame_seg_map[cm->mb_cols * y + x]); + } } + return seg_id; } #endif diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c index 01d332f..c1d4a29 100644 --- a/vp9/common/vp9_reconinter.c +++ b/vp9/common/vp9_reconinter.c @@ -780,6 +780,70 @@ void vp9_build_inter32x32_predictors_sb(MACROBLOCKD *x, } #endif } + +void vp9_build_inter64x64_predictors_sb(MACROBLOCKD *x, + uint8_t *dst_y, + uint8_t *dst_u, + uint8_t *dst_v, + int dst_ystride, + int dst_uvstride) { + uint8_t *y1 = x->pre.y_buffer, *u1 = x->pre.u_buffer, *v1 = x->pre.v_buffer; + uint8_t *y2 = x->second_pre.y_buffer, *u2 = x->second_pre.u_buffer, + *v2 = x->second_pre.v_buffer; + int edge[4], n; + + edge[0] = x->mb_to_top_edge; + edge[1] = x->mb_to_bottom_edge; + edge[2] = x->mb_to_left_edge; + edge[3] = x->mb_to_right_edge; + + for (n = 0; n < 4; n++) { + const int x_idx = n & 1, y_idx = n >> 1; + + x->mb_to_top_edge = edge[0] - ((y_idx * 32) << 3); + x->mb_to_bottom_edge = edge[1] + (((1 - y_idx) * 32) << 3); + x->mb_to_left_edge = edge[2] - ((x_idx * 32) << 3); + x->mb_to_right_edge = edge[3] + (((1 - x_idx) * 32) << 3); + + x->pre.y_buffer = y1 + y_idx * 32 * x->pre.y_stride + x_idx * 32; + x->pre.u_buffer = u1 + y_idx * 16 * x->pre.uv_stride + x_idx * 16; + x->pre.v_buffer = v1 + y_idx * 16 * x->pre.uv_stride + x_idx * 16; + + if (x->mode_info_context->mbmi.second_ref_frame > 0) { + x->second_pre.y_buffer = y2 + y_idx * 32 * x->pre.y_stride + x_idx * 32; + x->second_pre.u_buffer = u2 + y_idx * 16 * x->pre.uv_stride + x_idx * 16; + x->second_pre.v_buffer = v2 + y_idx * 16 * x->pre.uv_stride + x_idx * 16; + } + + vp9_build_inter32x32_predictors_sb(x, + dst_y + y_idx * 32 * dst_ystride + x_idx * 32, + dst_u + y_idx * 16 * dst_uvstride + x_idx * 16, + dst_v + y_idx * 16 * dst_uvstride + x_idx * 16, + dst_ystride, dst_uvstride); + } + + x->mb_to_top_edge = edge[0]; + x->mb_to_bottom_edge = edge[1]; + x->mb_to_left_edge = edge[2]; + x->mb_to_right_edge = edge[3]; + + x->pre.y_buffer = y1; + x->pre.u_buffer = u1; + x->pre.v_buffer = v1; + + if (x->mode_info_context->mbmi.second_ref_frame > 0) { + x->second_pre.y_buffer = y2; + x->second_pre.u_buffer = u2; + x->second_pre.v_buffer = v2; + } + +#if CONFIG_COMP_INTERINTRA_PRED + if (x->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) { + vp9_build_interintra_64x64_predictors_sb(x, dst_y, dst_u, dst_v, + dst_ystride, dst_uvstride); + } +#endif +} #endif /* diff --git a/vp9/common/vp9_reconinter.h b/vp9/common/vp9_reconinter.h index b104f83..5e45b68 100644 --- a/vp9/common/vp9_reconinter.h +++ b/vp9/common/vp9_reconinter.h @@ -54,6 +54,13 @@ extern void vp9_build_inter32x32_predictors_sb(MACROBLOCKD *x, uint8_t *dst_v, int dst_ystride, int dst_uvstride); + +extern void vp9_build_inter64x64_predictors_sb(MACROBLOCKD *x, + uint8_t *dst_y, + uint8_t *dst_u, + uint8_t *dst_v, + int dst_ystride, + int dst_uvstride); #endif extern void vp9_build_inter_predictors_mb(MACROBLOCKD *xd); diff --git a/vp9/common/vp9_reconintra.c b/vp9/common/vp9_reconintra.c index b893df1..3fec98a 100644 --- a/vp9/common/vp9_reconintra.c +++ b/vp9/common/vp9_reconintra.c @@ -254,7 +254,7 @@ void vp9_build_intra_predictors_internal(uint8_t *src, int src_stride, int up_available, int left_available) { uint8_t *yabove_row = src - src_stride; - uint8_t yleft_col[32]; + uint8_t yleft_col[64]; uint8_t ytop_left = yabove_row[-1]; int r, c, i; @@ -271,15 +271,19 @@ void vp9_build_intra_predictors_internal(uint8_t *src, int src_stride, int average = 0; int log2_bsize_minus_1; - assert(bsize == 4 || bsize == 8 || bsize == 16 || bsize == 32); + assert(bsize == 4 || bsize == 8 || bsize == 16 || bsize == 32 || + bsize == 64); if (bsize == 4) { log2_bsize_minus_1 = 1; } else if (bsize == 8) { log2_bsize_minus_1 = 2; } else if (bsize == 16) { log2_bsize_minus_1 = 3; - } else /* bsize == 32 */ { + } else if (bsize == 32) { log2_bsize_minus_1 = 4; + } else { + assert(bsize == 64); + log2_bsize_minus_1 = 5; } if (up_available || left_available) { @@ -517,16 +521,17 @@ static void combine_interintra(MB_PREDICTION_MODE mode, 71, 70, 70, 70, 69, 69, 69, 68, 68, 68, 68, 68, 67, 67, 67, 67, }; - int size_scale = (size == 32 ? 1 : + int size_scale = (size >= 32 ? 1 : size == 16 ? 2 : size == 8 ? 4 : 8); + int size_shift = size == 64 ? 1 : 0; int i, j; switch (mode) { case V_PRED: for (i = 0; i < size; ++i) { for (j = 0; j < size; ++j) { int k = i * interstride + j; - int scale = weights1d[i * size_scale]; + int scale = weights1d[i * size_scale >> size_shift]; interpred[k] = ((scale_max - scale) * interpred[k] + scale * intrapred[i * intrastride + j] + scale_round) @@ -539,7 +544,7 @@ static void combine_interintra(MB_PREDICTION_MODE mode, for (i = 0; i < size; ++i) { for (j = 0; j < size; ++j) { int k = i * interstride + j; - int scale = weights1d[j * size_scale]; + int scale = weights1d[j * size_scale >> size_shift]; interpred[k] = ((scale_max - scale) * interpred[k] + scale * intrapred[i * intrastride + j] + scale_round) @@ -553,8 +558,9 @@ static void combine_interintra(MB_PREDICTION_MODE mode, for (i = 0; i < size; ++i) { for (j = 0; j < size; ++j) { int k = i * interstride + j; - int scale = (weights2d[i * size_scale * 32 + j * size_scale] + - weights1d[i * size_scale]) >> 1; + int scale = (weights2d[(i * size_scale * 32 + + j * size_scale) >> size_shift] + + weights1d[i * size_scale >> size_shift]) >> 1; interpred[k] = ((scale_max - scale) * interpred[k] + scale * intrapred[i * intrastride + j] + scale_round) @@ -568,8 +574,9 @@ static void combine_interintra(MB_PREDICTION_MODE mode, for (i = 0; i < size; ++i) { for (j = 0; j < size; ++j) { int k = i * interstride + j; - int scale = (weights2d[i * size_scale * 32 + j * size_scale] + - weights1d[j * size_scale]) >> 1; + int scale = (weights2d[(i * size_scale * 32 + + j * size_scale) >> size_shift] + + weights1d[j * size_scale >> size_shift]) >> 1; interpred[k] = ((scale_max - scale) * interpred[k] + scale * intrapred[i * intrastride + j] + scale_round) @@ -582,7 +589,8 @@ static void combine_interintra(MB_PREDICTION_MODE mode, for (i = 0; i < size; ++i) { for (j = 0; j < size; ++j) { int k = i * interstride + j; - int scale = weights2d[i * size_scale * 32 + j * size_scale]; + int scale = weights2d[(i * size_scale * 32 + + j * size_scale) >> size_shift]; interpred[k] = ((scale_max - scale) * interpred[k] + scale * intrapred[i * intrastride + j] + scale_round) @@ -695,6 +703,47 @@ void vp9_build_interintra_32x32_predictors_sb(MACROBLOCKD *xd, vp9_build_interintra_32x32_predictors_sby(xd, ypred, ystride); vp9_build_interintra_32x32_predictors_sbuv(xd, upred, vpred, uvstride); } + +void vp9_build_interintra_64x64_predictors_sby(MACROBLOCKD *xd, + uint8_t *ypred, + int ystride) { + uint8_t intrapredictor[4096]; + const int mode = xd->mode_info_context->mbmi.interintra_mode; + vp9_build_intra_predictors_internal(xd->dst.y_buffer, xd->dst.y_stride, + intrapredictor, 64, mode, 64, + xd->up_available, xd->left_available); + combine_interintra(xd->mode_info_context->mbmi.interintra_mode, + ypred, ystride, intrapredictor, 64, 64); +} + +void vp9_build_interintra_64x64_predictors_sbuv(MACROBLOCKD *xd, + uint8_t *upred, + uint8_t *vpred, + int uvstride) { + uint8_t uintrapredictor[1024]; + uint8_t vintrapredictor[1024]; + const int mode = xd->mode_info_context->mbmi.interintra_uv_mode; + vp9_build_intra_predictors_internal(xd->dst.u_buffer, xd->dst.uv_stride, + uintrapredictor, 32, mode, 32, + xd->up_available, xd->left_available); + vp9_build_intra_predictors_internal(xd->dst.v_buffer, xd->dst.uv_stride, + vintrapredictor, 32, mode, 32, + xd->up_available, xd->left_available); + combine_interintra(xd->mode_info_context->mbmi.interintra_uv_mode, + upred, uvstride, uintrapredictor, 32, 32); + combine_interintra(xd->mode_info_context->mbmi.interintra_uv_mode, + vpred, uvstride, vintrapredictor, 32, 32); +} + +void vp9_build_interintra_64x64_predictors_sb(MACROBLOCKD *xd, + uint8_t *ypred, + uint8_t *upred, + uint8_t *vpred, + int ystride, + int uvstride) { + vp9_build_interintra_64x64_predictors_sby(xd, ypred, ystride); + vp9_build_interintra_64x64_predictors_sbuv(xd, upred, vpred, uvstride); +} #endif #endif @@ -719,6 +768,13 @@ void vp9_build_intra_predictors_sby_s(MACROBLOCKD *xd) { xd->mode_info_context->mbmi.mode, 32, xd->up_available, xd->left_available); } + +void vp9_build_intra_predictors_sb64y_s(MACROBLOCKD *xd) { + vp9_build_intra_predictors_internal(xd->dst.y_buffer, xd->dst.y_stride, + xd->dst.y_buffer, xd->dst.y_stride, + xd->mode_info_context->mbmi.mode, 64, + xd->up_available, xd->left_available); +} #endif #if CONFIG_COMP_INTRA_PRED @@ -778,6 +834,13 @@ void vp9_build_intra_predictors_sbuv_s(MACROBLOCKD *xd) { xd->mode_info_context->mbmi.uv_mode, 16); } + +void vp9_build_intra_predictors_sb64uv_s(MACROBLOCKD *xd) { + vp9_build_intra_predictors_mbuv_internal(xd, xd->dst.u_buffer, + xd->dst.v_buffer, xd->dst.uv_stride, + xd->mode_info_context->mbmi.uv_mode, + 32); +} #endif #if CONFIG_COMP_INTRA_PRED diff --git a/vp9/common/vp9_reconintra.h b/vp9/common/vp9_reconintra.h index f3016dd..7bdcb4e 100644 --- a/vp9/common/vp9_reconintra.h +++ b/vp9/common/vp9_reconintra.h @@ -41,6 +41,12 @@ extern void vp9_build_interintra_32x32_predictors_sb(MACROBLOCKD *xd, uint8_t *vpred, int ystride, int uvstride); +extern void vp9_build_interintra_64x64_predictors_sb(MACROBLOCKD *xd, + uint8_t *ypred, + uint8_t *upred, + uint8_t *vpred, + int ystride, + int uvstride); #endif // CONFIG_SUPERBLOCKS #endif // VP9_COMMON_VP9_RECONINTRA_H_ diff --git a/vp9/common/vp9_reconintra4x4.c b/vp9/common/vp9_reconintra4x4.c index d61a515..c41b55b 100644 --- a/vp9/common/vp9_reconintra4x4.c +++ b/vp9/common/vp9_reconintra4x4.c @@ -434,12 +434,9 @@ void vp9_comp_intra4x4_predict_c(BLOCKD *x, * to the right prediction have filled in pixels to use. */ void vp9_intra_prediction_down_copy(MACROBLOCKD *xd) { - int extend_edge = (xd->mb_to_right_edge == 0 && xd->mb_index < 2); + int extend_edge = xd->mb_to_right_edge == 0 && xd->mb_index < 2; uint8_t *above_right = *(xd->block[0].base_dst) + xd->block[0].dst - xd->block[0].dst_stride + 16; - uint32_t *src_ptr = (uint32_t *) - (above_right - (xd->mb_index == 3 ? 16 * xd->block[0].dst_stride : 0)); - uint32_t *dst_ptr0 = (uint32_t *)above_right; uint32_t *dst_ptr1 = (uint32_t *)(above_right + 4 * xd->block[0].dst_stride); @@ -448,6 +445,17 @@ void vp9_intra_prediction_down_copy(MACROBLOCKD *xd) { uint32_t *dst_ptr3 = (uint32_t *)(above_right + 12 * xd->block[0].dst_stride); + uint32_t *src_ptr = (uint32_t *) above_right; + + if ((xd->sb_index >= 2 && xd->mb_to_right_edge == 0) || + (xd->sb_index == 3 && xd->mb_index & 1)) + src_ptr = (uint32_t *) (((uint8_t *) src_ptr) - 32 * + xd->block[0].dst_stride); + if (xd->mb_index == 3 || + (xd->mb_to_right_edge == 0 && xd->mb_index == 2)) + src_ptr = (uint32_t *) (((uint8_t *) src_ptr) - 16 * + xd->block[0].dst_stride); + if (extend_edge) { *src_ptr = ((uint8_t *) src_ptr)[-1] * 0x01010101U; } diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index 9cf7121..f02ee02 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -418,6 +418,9 @@ if [ "$CONFIG_VP9_ENCODER" = "yes" ]; then prototype unsigned int vp9_variance32x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" specialize vp9_variance32x32 +prototype unsigned int vp9_variance64x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" +specialize vp9_variance64x64 + prototype unsigned int vp9_variance16x16 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" specialize vp9_variance16x16 mmx sse2 vp9_variance16x16_sse2=vp9_variance16x16_wmt @@ -443,6 +446,9 @@ specialize vp9_variance4x4 mmx sse2 vp9_variance4x4_sse2=vp9_variance4x4_wmt vp9_variance4x4_mmx=vp9_variance4x4_mmx +prototype unsigned int vp9_sub_pixel_variance64x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int Refstride, unsigned int *sse" +specialize vp9_sub_pixel_variance64x64 + prototype unsigned int vp9_sub_pixel_variance32x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int Refstride, unsigned int *sse" specialize vp9_sub_pixel_variance32x32 @@ -467,6 +473,9 @@ prototype unsigned int vp9_sub_pixel_variance4x4 "const uint8_t *src_ptr, int so specialize vp9_sub_pixel_variance4x4 sse2 mmx vp9_sub_pixel_variance4x4_sse2=vp9_sub_pixel_variance4x4_wmt +prototype unsigned int vp9_sad64x64 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad" +specialize vp9_sad64x64 + prototype unsigned int vp9_sad32x32 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad" specialize vp9_sad32x32 @@ -502,6 +511,15 @@ prototype unsigned int vp9_variance_halfpixvar16x16_hv "const uint8_t *src_ptr, specialize vp9_variance_halfpixvar16x16_hv mmx sse2 vp9_variance_halfpixvar16x16_hv_sse2=vp9_variance_halfpixvar16x16_hv_wmt +prototype unsigned int vp9_variance_halfpixvar64x64_h "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" +specialize vp9_variance_halfpixvar64x64_h + +prototype unsigned int vp9_variance_halfpixvar64x64_v "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" +specialize vp9_variance_halfpixvar64x64_v + +prototype unsigned int vp9_variance_halfpixvar64x64_hv "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" +specialize vp9_variance_halfpixvar64x64_hv + prototype unsigned int vp9_variance_halfpixvar32x32_h "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" specialize vp9_variance_halfpixvar32x32_h @@ -511,6 +529,9 @@ specialize vp9_variance_halfpixvar32x32_v prototype unsigned int vp9_variance_halfpixvar32x32_hv "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse" specialize vp9_variance_halfpixvar32x32_hv +prototype void vp9_sad64x64x3 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array" +specialize vp9_sad64x64x3 + prototype void vp9_sad32x32x3 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array" specialize vp9_sad32x32x3 @@ -529,6 +550,9 @@ specialize vp9_sad8x8x3 sse3 prototype void vp9_sad4x4x3 "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array" specialize vp9_sad4x4x3 sse3 +prototype void vp9_sad64x64x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint16_t *sad_array" +specialize vp9_sad64x64x8 + prototype void vp9_sad32x32x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint16_t *sad_array" specialize vp9_sad32x32x8 @@ -547,6 +571,9 @@ specialize vp9_sad8x8x8 sse4 prototype void vp9_sad4x4x8 "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint16_t *sad_array" specialize vp9_sad4x4x8 sse4 +prototype void vp9_sad64x64x4d "const uint8_t *src_ptr, int src_stride, const uint8_t **ref_ptr, int ref_stride, unsigned int *sad_array" +specialize vp9_sad64x64x4d + prototype void vp9_sad32x32x4d "const uint8_t *src_ptr, int src_stride, const uint8_t **ref_ptr, int ref_stride, unsigned int *sad_array" specialize vp9_sad32x32x4d @@ -583,6 +610,9 @@ prototype unsigned int vp9_mse16x16 "const uint8_t *src_ptr, int source_stride, specialize vp9_mse16x16 mmx sse2 vp9_mse16x16_sse2=vp9_mse16x16_wmt +prototype unsigned int vp9_sub_pixel_mse64x64 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int Refstride, unsigned int *sse" +specialize vp9_sub_pixel_mse64x64 + prototype unsigned int vp9_sub_pixel_mse32x32 "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int Refstride, unsigned int *sse" specialize vp9_sub_pixel_mse32x32 diff --git a/vp9/decoder/vp9_decodemv.c b/vp9/decoder/vp9_decodemv.c index cbd3fb9..bbe2e95 100644 --- a/vp9/decoder/vp9_decodemv.c +++ b/vp9/decoder/vp9_decodemv.c @@ -14,7 +14,7 @@ #include "vp9/common/vp9_entropymode.h" #include "vp9/decoder/vp9_onyxd_int.h" #include "vp9/common/vp9_findnearmv.h" - +#include "vp9/common/vp9_common.h" #include "vp9/common/vp9_seg_common.h" #include "vp9/common/vp9_pred_common.h" #include "vp9/common/vp9_entropy.h" @@ -122,7 +122,24 @@ static void kfread_modes(VP9D_COMP *pbi, m->mbmi.segment_id = 0; if (pbi->mb.update_mb_segmentation_map) { read_mb_segid(bc, &m->mbmi, &pbi->mb); - pbi->common.last_frame_seg_map[map_index] = m->mbmi.segment_id; +#if CONFIG_SUPERBLOCKS + if (m->mbmi.sb_type) { + const int nmbs = 1 << m->mbmi.sb_type; + const int ymbs = MIN(cm->mb_rows - mb_row, nmbs); + const int xmbs = MIN(cm->mb_cols - mb_col, nmbs); + int x, y; + + for (y = 0; y < ymbs; y++) { + for (x = 0; x < xmbs; x++) { + cm->last_frame_seg_map[map_index + x + y * cm->mb_cols] = + m->mbmi.segment_id; + } + } + } else +#endif + { + cm->last_frame_seg_map[map_index] = m->mbmi.segment_id; + } } m->mbmi.mb_skip_coeff = 0; @@ -145,7 +162,7 @@ static void kfread_modes(VP9D_COMP *pbi, } #if CONFIG_SUPERBLOCKS - if (m->mbmi.encoded_as_sb) { + if (m->mbmi.sb_type) { y_mode = (MB_PREDICTION_MODE) read_kf_sb_ymode(bc, pbi->common.sb_kf_ymode_prob[pbi->common.kf_ymode_probs_index]); } else @@ -212,12 +229,12 @@ static void kfread_modes(VP9D_COMP *pbi, if (m->mbmi.txfm_size != TX_4X4 && m->mbmi.mode != I8X8_PRED) { m->mbmi.txfm_size += vp9_read(bc, cm->prob_tx[1]); #if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS - if (m->mbmi.txfm_size != TX_8X8 && m->mbmi.encoded_as_sb) + if (m->mbmi.txfm_size != TX_8X8 && m->mbmi.sb_type) m->mbmi.txfm_size += vp9_read(bc, cm->prob_tx[2]); #endif } #if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS - } else if (cm->txfm_mode >= ALLOW_32X32 && m->mbmi.encoded_as_sb) { + } else if (cm->txfm_mode >= ALLOW_32X32 && m->mbmi.sb_type) { m->mbmi.txfm_size = TX_32X32; #endif } else if (cm->txfm_mode >= ALLOW_16X16 && m->mbmi.mode <= TM_PRED) { @@ -638,14 +655,17 @@ static void read_mb_segment_id(VP9D_COMP *pbi, read_mb_segid(bc, mbmi, xd); } #if CONFIG_SUPERBLOCKS - if (mbmi->encoded_as_sb) { - cm->last_frame_seg_map[index] = mbmi->segment_id; - if (mb_col + 1 < cm->mb_cols) - cm->last_frame_seg_map[index + 1] = mbmi->segment_id; - if (mb_row + 1 < cm->mb_rows) { - cm->last_frame_seg_map[index + cm->mb_cols] = mbmi->segment_id; - if (mb_col + 1 < cm->mb_cols) - cm->last_frame_seg_map[index + cm->mb_cols + 1] = mbmi->segment_id; + if (mbmi->sb_type) { + const int nmbs = 1 << mbmi->sb_type; + const int ymbs = MIN(cm->mb_rows - mb_row, nmbs); + const int xmbs = MIN(cm->mb_cols - mb_col, nmbs); + int x, y; + + for (y = 0; y < ymbs; y++) { + for (x = 0; x < xmbs; x++) { + cm->last_frame_seg_map[index + x + y * cm->mb_cols] = + mbmi->segment_id; + } } } else #endif @@ -654,18 +674,21 @@ static void read_mb_segment_id(VP9D_COMP *pbi, } } else { #if CONFIG_SUPERBLOCKS - if (mbmi->encoded_as_sb) { - mbmi->segment_id = cm->last_frame_seg_map[index]; - if (mb_col < cm->mb_cols - 1) - mbmi->segment_id = mbmi->segment_id && - cm->last_frame_seg_map[index + 1]; - if (mb_row < cm->mb_rows - 1) { - mbmi->segment_id = mbmi->segment_id && - cm->last_frame_seg_map[index + cm->mb_cols]; - if (mb_col < cm->mb_cols - 1) - mbmi->segment_id = mbmi->segment_id && - cm->last_frame_seg_map[index + cm->mb_cols + 1]; + if (mbmi->sb_type) { + const int nmbs = 1 << mbmi->sb_type; + const int ymbs = MIN(cm->mb_rows - mb_row, nmbs); + const int xmbs = MIN(cm->mb_cols - mb_col, nmbs); + unsigned segment_id = -1; + int x, y; + + for (y = 0; y < ymbs; y++) { + for (x = 0; x < xmbs; x++) { + segment_id = MIN(segment_id, + cm->last_frame_seg_map[index + x + + y * cm->mb_cols]); + } } + mbmi->segment_id = segment_id; } else #endif { @@ -693,6 +716,11 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, int mb_to_right_edge; int mb_to_top_edge; int mb_to_bottom_edge; +#if CONFIG_SUPERBLOCKS + const int mb_size = 1 << mi->mbmi.sb_type; +#else + const int mb_size = 1; +#endif mb_to_top_edge = xd->mb_to_top_edge; mb_to_bottom_edge = xd->mb_to_bottom_edge; @@ -707,18 +735,8 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, xd->mb_to_left_edge = mb_to_left_edge = -((mb_col * 16) << 3); mb_to_left_edge -= LEFT_TOP_MARGIN; - -#if CONFIG_SUPERBLOCKS - if (mi->mbmi.encoded_as_sb) { - xd->mb_to_right_edge = - mb_to_right_edge = ((pbi->common.mb_cols - 2 - mb_col) * 16) << 3; - } else { -#endif - xd->mb_to_right_edge = - mb_to_right_edge = ((pbi->common.mb_cols - 1 - mb_col) * 16) << 3; -#if CONFIG_SUPERBLOCKS - } -#endif + xd->mb_to_right_edge = + mb_to_right_edge = ((pbi->common.mb_cols - mb_size - mb_col) * 16) << 3; mb_to_right_edge += RIGHT_BOTTOM_MARGIN; // Make sure the MACROBLOCKD mode info pointer is pointed at the @@ -801,7 +819,7 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, vp9_get_segdata(xd, mbmi->segment_id, SEG_LVL_MODE); } else { #if CONFIG_SUPERBLOCKS - if (mbmi->encoded_as_sb) + if (mbmi->sb_type) mbmi->mode = read_sb_mv_ref(bc, mv_ref_p); else #endif @@ -1155,7 +1173,7 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, mbmi->mode = (MB_PREDICTION_MODE) vp9_get_segdata(xd, mbmi->segment_id, SEG_LVL_MODE); #if CONFIG_SUPERBLOCKS - } else if (mbmi->encoded_as_sb) { + } else if (mbmi->sb_type) { mbmi->mode = (MB_PREDICTION_MODE) read_sb_ymode(bc, pbi->common.fc.sb_ymode_prob); pbi->common.fc.sb_ymode_counts[mbmi->mode]++; @@ -1232,12 +1250,12 @@ static void read_mb_modes_mv(VP9D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi, mbmi->mode != SPLITMV) { mbmi->txfm_size += vp9_read(bc, cm->prob_tx[1]); #if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS - if (mbmi->encoded_as_sb && mbmi->txfm_size != TX_8X8) + if (mbmi->sb_type && mbmi->txfm_size != TX_8X8) mbmi->txfm_size += vp9_read(bc, cm->prob_tx[2]); #endif } #if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS - } else if (mbmi->encoded_as_sb && cm->txfm_mode >= ALLOW_32X32) { + } else if (mbmi->sb_type && cm->txfm_mode >= ALLOW_32X32) { mbmi->txfm_size = TX_32X32; #endif } else if (cm->txfm_mode >= ALLOW_16X16 && diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index af34582..d524ade 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -10,6 +10,7 @@ #include "vp9/decoder/vp9_onyxd_int.h" +#include "vp9/common/vp9_common.h" #include "vp9/common/vp9_header.h" #include "vp9/common/vp9_reconintra.h" #include "vp9/common/vp9_reconintra4x4.h" @@ -172,55 +173,69 @@ static void mb_init_dequantizer(VP9D_COMP *pbi, MACROBLOCKD *xd) { static void skip_recon_mb(VP9D_COMP *pbi, MACROBLOCKD *xd) { if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) { #if CONFIG_SUPERBLOCKS - if (xd->mode_info_context->mbmi.encoded_as_sb) { +#if CONFIG_SUPERBLOCKS64 + if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB64X64) { + vp9_build_intra_predictors_sb64uv_s(xd); + vp9_build_intra_predictors_sb64y_s(xd); + } else +#endif // CONFIG_SUPERBLOCKS64 + if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB32X32) { vp9_build_intra_predictors_sbuv_s(xd); vp9_build_intra_predictors_sby_s(xd); - } else { -#endif - vp9_build_intra_predictors_mbuv_s(xd); - vp9_build_intra_predictors_mby_s(xd); -#if CONFIG_SUPERBLOCKS + } else +#endif // CONFIG_SUPERBLOCKS + { + vp9_build_intra_predictors_mbuv_s(xd); + vp9_build_intra_predictors_mby_s(xd); } -#endif } else { #if CONFIG_SUPERBLOCKS - if (xd->mode_info_context->mbmi.encoded_as_sb) { +#if CONFIG_SUPERBLOCKS64 + if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB64X64) { + vp9_build_inter64x64_predictors_sb(xd, + xd->dst.y_buffer, + xd->dst.u_buffer, + xd->dst.v_buffer, + xd->dst.y_stride, + xd->dst.uv_stride); + } else +#endif // CONFIG_SUPERBLOCKS64 + if (xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB32X32) { vp9_build_inter32x32_predictors_sb(xd, xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.y_stride, xd->dst.uv_stride); - } else { -#endif - vp9_build_1st_inter16x16_predictors_mb(xd, - xd->dst.y_buffer, - xd->dst.u_buffer, - xd->dst.v_buffer, - xd->dst.y_stride, - xd->dst.uv_stride); - - if (xd->mode_info_context->mbmi.second_ref_frame > 0) { - vp9_build_2nd_inter16x16_predictors_mb(xd, + } else +#endif // CONFIG_SUPERBLOCKS + { + vp9_build_1st_inter16x16_predictors_mb(xd, xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.y_stride, xd->dst.uv_stride); - } -#if CONFIG_COMP_INTERINTRA_PRED - else if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) { - vp9_build_interintra_16x16_predictors_mb(xd, + + if (xd->mode_info_context->mbmi.second_ref_frame > 0) { + vp9_build_2nd_inter16x16_predictors_mb(xd, xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.y_stride, xd->dst.uv_stride); - } + } +#if CONFIG_COMP_INTERINTRA_PRED + else if (xd->mode_info_context->mbmi.second_ref_frame == INTRA_FRAME) { + vp9_build_interintra_16x16_predictors_mb(xd, + xd->dst.y_buffer, + xd->dst.u_buffer, + xd->dst.v_buffer, + xd->dst.y_stride, + xd->dst.uv_stride); + } #endif -#if CONFIG_SUPERBLOCKS } -#endif } } @@ -546,8 +561,9 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, #if CONFIG_SUPERBLOCKS static void decode_16x16_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, - BOOL_DECODER* const bc, int n) { - int x_idx = n & 1, y_idx = n >> 1; + BOOL_DECODER* const bc, int n, + int maska, int shiftb) { + int x_idx = n & maska, y_idx = n >> shiftb; TX_TYPE tx_type = get_tx_type_16x16(xd, &xd->block[0]); if (tx_type != DCT_DCT) { vp9_ht_dequant_idct_add_16x16_c( @@ -571,9 +587,10 @@ static void decode_16x16_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, }; static void decode_8x8_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, - BOOL_DECODER* const bc, int n) { + BOOL_DECODER* const bc, int n, + int maska, int shiftb) { + int x_idx = n & maska, y_idx = n >> shiftb; BLOCKD *b = &xd->block[24]; - int x_idx = n & 1, y_idx = n >> 1; TX_TYPE tx_type = get_tx_type_8x8(xd, &xd->block[0]); if (tx_type != DCT_DCT) { int i; @@ -632,9 +649,10 @@ static void decode_8x8_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, }; static void decode_4x4_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, - BOOL_DECODER* const bc, int n) { + BOOL_DECODER* const bc, int n, + int maska, int shiftb) { + int x_idx = n & maska, y_idx = n >> shiftb; BLOCKD *b = &xd->block[24]; - int x_idx = n & 1, y_idx = n >> 1; TX_TYPE tx_type = get_tx_type_4x4(xd, &xd->block[0]); if (tx_type != DCT_DCT) { int i; @@ -687,16 +705,148 @@ static void decode_4x4_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, xd->dst.uv_stride, xd->eobs + 16, xd); }; -static void decode_superblock(VP9D_COMP *pbi, MACROBLOCKD *xd, - int mb_row, unsigned int mb_col, - BOOL_DECODER* const bc) { +#if CONFIG_SUPERBLOCKS64 +static void decode_superblock64(VP9D_COMP *pbi, MACROBLOCKD *xd, + int mb_row, unsigned int mb_col, + BOOL_DECODER* const bc) { int i, n, eobtotal; TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size; VP9_COMMON *const pc = &pbi->common; MODE_INFO *orig_mi = xd->mode_info_context; const int mis = pc->mode_info_stride; - assert(xd->mode_info_context->mbmi.encoded_as_sb); + assert(xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB64X64); + + if (pbi->common.frame_type != KEY_FRAME) + vp9_setup_interp_filters(xd, xd->mode_info_context->mbmi.interp_filter, pc); + + // re-initialize macroblock dequantizer before detokenization + if (xd->segmentation_enabled) + mb_init_dequantizer(pbi, xd); + + if (xd->mode_info_context->mbmi.mb_skip_coeff) { + int n; + + vp9_reset_mb_tokens_context(xd); + for (n = 1; n <= 3; n++) { + if (mb_col < pc->mb_cols - n) + xd->above_context += n; + if (mb_row < pc->mb_rows - n) + xd->left_context += n; + vp9_reset_mb_tokens_context(xd); + if (mb_col < pc->mb_cols - n) + xd->above_context -= n; + if (mb_row < pc->mb_rows - n) + xd->left_context -= n; + } + + /* Special case: Force the loopfilter to skip when eobtotal and + * mb_skip_coeff are zero. + */ + skip_recon_mb(pbi, xd); + return; + } + + /* do prediction */ + if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) { + vp9_build_intra_predictors_sb64y_s(xd); + vp9_build_intra_predictors_sb64uv_s(xd); + } else { + vp9_build_inter64x64_predictors_sb(xd, xd->dst.y_buffer, + xd->dst.u_buffer, xd->dst.v_buffer, + xd->dst.y_stride, xd->dst.uv_stride); + } + + /* dequantization and idct */ +#if CONFIG_TX32X32 + if (xd->mode_info_context->mbmi.txfm_size == TX_32X32) { + for (n = 0; n < 4; n++) { + const int x_idx = n & 1, y_idx = n >> 1; + + if (mb_col + x_idx * 2 >= pc->mb_cols || + mb_row + y_idx * 2 >= pc->mb_rows) + continue; + + xd->left_context = pc->left_context + (y_idx << 1); + xd->above_context = pc->above_context + mb_col + (x_idx << 1); + xd->mode_info_context = orig_mi + x_idx * 2 + y_idx * 2 * mis; + eobtotal = vp9_decode_sb_tokens(pbi, xd, bc); + if (eobtotal == 0) { // skip loopfilter + xd->mode_info_context->mbmi.mb_skip_coeff = 1; + if (mb_col + 1 < pc->mb_cols) + xd->mode_info_context[1].mbmi.mb_skip_coeff = 1; + if (mb_row + 1 < pc->mb_rows) { + xd->mode_info_context[mis].mbmi.mb_skip_coeff = 1; + if (mb_col + 1 < pc->mb_cols) + xd->mode_info_context[mis + 1].mbmi.mb_skip_coeff = 1; + } + } else { + vp9_dequant_idct_add_32x32(xd->sb_coeff_data.qcoeff, xd->block[0].dequant, + xd->dst.y_buffer + x_idx * 32 + + xd->dst.y_stride * y_idx * 32, + xd->dst.y_buffer + x_idx * 32 + + xd->dst.y_stride * y_idx * 32, + xd->dst.y_stride, xd->dst.y_stride, + xd->eobs[0]); + vp9_dequant_idct_add_uv_block_16x16_c(xd->sb_coeff_data.qcoeff + 1024, + xd->block[16].dequant, + xd->dst.u_buffer + x_idx * 16 + + xd->dst.uv_stride * y_idx * 16, + xd->dst.v_buffer + x_idx * 16 + + xd->dst.uv_stride * y_idx * 16, + xd->dst.uv_stride, xd->eobs + 16); + } + } + } else { +#endif + for (n = 0; n < 16; n++) { + int x_idx = n & 3, y_idx = n >> 2; + + if (mb_col + x_idx >= pc->mb_cols || mb_row + y_idx >= pc->mb_rows) + continue; + + xd->above_context = pc->above_context + mb_col + x_idx; + xd->left_context = pc->left_context + y_idx; + xd->mode_info_context = orig_mi + x_idx + y_idx * mis; + for (i = 0; i < 25; i++) { + xd->block[i].eob = 0; + xd->eobs[i] = 0; + } + + eobtotal = vp9_decode_mb_tokens(pbi, xd, bc); + if (eobtotal == 0) { // skip loopfilter + xd->mode_info_context->mbmi.mb_skip_coeff = 1; + continue; + } + + if (tx_size == TX_16X16) { + decode_16x16_sb(pbi, xd, bc, n, 3, 2); + } else if (tx_size == TX_8X8) { + decode_8x8_sb(pbi, xd, bc, n, 3, 2); + } else { + decode_4x4_sb(pbi, xd, bc, n, 3, 2); + } + } +#if CONFIG_TX32X32 + } +#endif + + xd->above_context = pc->above_context + mb_col; + xd->left_context = pc->left_context; + xd->mode_info_context = orig_mi; +} +#endif // CONFIG_SUPERBLOCKS64 + +static void decode_superblock32(VP9D_COMP *pbi, MACROBLOCKD *xd, + int mb_row, unsigned int mb_col, + BOOL_DECODER* const bc) { + int i, n, eobtotal; + TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size; + VP9_COMMON *const pc = &pbi->common; + MODE_INFO *orig_mi = xd->mode_info_context; + const int mis = pc->mode_info_stride; + + assert(xd->mode_info_context->mbmi.sb_type == BLOCK_SIZE_SB32X32); if (pbi->common.frame_type != KEY_FRAME) vp9_setup_interp_filters(xd, xd->mode_info_context->mbmi.interp_filter, pc); @@ -767,7 +917,7 @@ static void decode_superblock(VP9D_COMP *pbi, MACROBLOCKD *xd, xd->above_context = pc->above_context + mb_col + x_idx; - xd->left_context = pc->left_context + y_idx; + xd->left_context = pc->left_context + y_idx + (mb_row & 2); xd->mode_info_context = orig_mi + x_idx + y_idx * mis; for (i = 0; i < 25; i++) { xd->block[i].eob = 0; @@ -781,16 +931,16 @@ static void decode_superblock(VP9D_COMP *pbi, MACROBLOCKD *xd, } if (tx_size == TX_16X16) { - decode_16x16_sb(pbi, xd, bc, n); + decode_16x16_sb(pbi, xd, bc, n, 1, 1); } else if (tx_size == TX_8X8) { - decode_8x8_sb(pbi, xd, bc, n); + decode_8x8_sb(pbi, xd, bc, n, 1, 1); } else { - decode_4x4_sb(pbi, xd, bc, n); + decode_4x4_sb(pbi, xd, bc, n, 1, 1); } } xd->above_context = pc->above_context + mb_col; - xd->left_context = pc->left_context; + xd->left_context = pc->left_context + (mb_row & 2); xd->mode_info_context = orig_mi; #if CONFIG_TX32X32 } @@ -807,7 +957,7 @@ static void decode_macroblock(VP9D_COMP *pbi, MACROBLOCKD *xd, int tx_size; #if CONFIG_SUPERBLOCKS - assert(!xd->mode_info_context->mbmi.encoded_as_sb); + assert(!xd->mode_info_context->mbmi.sb_type); #endif // re-initialize macroblock dequantizer before detokenization @@ -930,190 +1080,186 @@ static int get_delta_q(vp9_reader *bc, int prev, int *q_update) { FILE *vpxlog = 0; #endif -/* Decode a row of Superblocks (2x2 region of MBs) */ -static void -decode_sb_row(VP9D_COMP *pbi, VP9_COMMON *pc, int mbrow, MACROBLOCKD *xd, - BOOL_DECODER* const bc) { - int i; - int sb_col; - int mb_row, mb_col; - int recon_yoffset, recon_uvoffset; - int ref_fb_idx = pc->lst_fb_idx; - int dst_fb_idx = pc->new_fb_idx; - int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride; - int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride; - int row_delta[4] = { 0, +1, 0, -1}; - int col_delta[4] = { +1, -1, +1, +1}; - int sb_cols = (pc->mb_cols + 1) >> 1; +static void set_offsets(VP9D_COMP *pbi, int block_size, + int mb_row, int mb_col) { + VP9_COMMON *const cm = &pbi->common; + MACROBLOCKD *const xd = &pbi->mb; + const int mis = cm->mode_info_stride; + const int idx = mis * mb_row + mb_col; + const int dst_fb_idx = cm->new_fb_idx; + const int recon_y_stride = cm->yv12_fb[dst_fb_idx].y_stride; + const int recon_uv_stride = cm->yv12_fb[dst_fb_idx].uv_stride; + const int recon_yoffset = mb_row * 16 * recon_y_stride + 16 * mb_col; + const int recon_uvoffset = mb_row * 8 * recon_uv_stride + 8 * mb_col; + + xd->mode_info_context = cm->mi + idx; +#if CONFIG_SUPERBLOCKS + xd->mode_info_context->mbmi.sb_type = block_size >> 5; +#endif + xd->prev_mode_info_context = cm->prev_mi + idx; + xd->above_context = cm->above_context + mb_col; + xd->left_context = cm->left_context + (mb_row & 3); - // For a SB there are 2 left contexts, each pertaining to a MB row within - vpx_memset(pc->left_context, 0, sizeof(pc->left_context)); + /* Distance of Mb to the various image edges. + * These are specified to 8th pel as they are always compared to + * values that are in 1/8th pel units + */ + block_size >>= 4; // in mb units + xd->mb_to_top_edge = -((mb_row * 16)) << 3; + xd->mb_to_left_edge = -((mb_col * 16) << 3); + xd->mb_to_bottom_edge = ((cm->mb_rows - block_size - mb_row) * 16) << 3; + xd->mb_to_right_edge = ((cm->mb_cols - block_size - mb_col) * 16) << 3; + + xd->up_available = (mb_row != 0); + xd->left_available = (mb_col != 0); + + xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset; + xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset; + xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset; +} - mb_row = mbrow; - mb_col = 0; +static void set_refs(VP9D_COMP *pbi, int block_size, + int mb_row, int mb_col) { + VP9_COMMON *const cm = &pbi->common; + MACROBLOCKD *const xd = &pbi->mb; + MODE_INFO *mi = xd->mode_info_context; + MB_MODE_INFO *const mbmi = &mi->mbmi; + + if (mbmi->ref_frame > INTRA_FRAME) { + int ref_fb_idx, ref_yoffset, ref_uvoffset, ref_y_stride, ref_uv_stride; + + /* Select the appropriate reference frame for this MB */ + if (mbmi->ref_frame == LAST_FRAME) + ref_fb_idx = cm->lst_fb_idx; + else if (mbmi->ref_frame == GOLDEN_FRAME) + ref_fb_idx = cm->gld_fb_idx; + else + ref_fb_idx = cm->alt_fb_idx; - for (sb_col = 0; sb_col < sb_cols; sb_col++) { - MODE_INFO *mi = xd->mode_info_context; + ref_y_stride = cm->yv12_fb[ref_fb_idx].y_stride; + ref_yoffset = mb_row * 16 * ref_y_stride + 16 * mb_col; + xd->pre.y_buffer = cm->yv12_fb[ref_fb_idx].y_buffer + ref_yoffset; + ref_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride; + ref_uvoffset = mb_row * 8 * ref_uv_stride + 8 * mb_col; + xd->pre.u_buffer = cm->yv12_fb[ref_fb_idx].u_buffer + ref_uvoffset; + xd->pre.v_buffer = cm->yv12_fb[ref_fb_idx].v_buffer + ref_uvoffset; -#if CONFIG_SUPERBLOCKS - mi->mbmi.encoded_as_sb = vp9_read(bc, pc->sb_coded); -#endif + /* propagate errors from reference frames */ + xd->corrupted |= cm->yv12_fb[ref_fb_idx].corrupted; - // Process the 4 MBs within the SB in the order: - // top-left, top-right, bottom-left, bottom-right - for (i = 0; i < 4; i++) { - int dy = row_delta[i]; - int dx = col_delta[i]; - int offset_extended = dy * xd->mode_info_stride + dx; - - xd->mb_index = i; - - mi = xd->mode_info_context; - if ((mb_row >= pc->mb_rows) || (mb_col >= pc->mb_cols)) { - // MB lies outside frame, skip on to next - mb_row += dy; - mb_col += dx; - xd->mode_info_context += offset_extended; - xd->prev_mode_info_context += offset_extended; - continue; - } -#if CONFIG_SUPERBLOCKS - if (i) - mi->mbmi.encoded_as_sb = 0; -#endif + if (mbmi->second_ref_frame > INTRA_FRAME) { + int second_ref_fb_idx; - // Set above context pointer - xd->above_context = pc->above_context + mb_col; - xd->left_context = pc->left_context + (i >> 1); + /* Select the appropriate reference frame for this MB */ + if (mbmi->second_ref_frame == LAST_FRAME) + second_ref_fb_idx = cm->lst_fb_idx; + else if (mbmi->second_ref_frame == GOLDEN_FRAME) + second_ref_fb_idx = cm->gld_fb_idx; + else + second_ref_fb_idx = cm->alt_fb_idx; + + xd->second_pre.y_buffer = + cm->yv12_fb[second_ref_fb_idx].y_buffer + ref_yoffset; + xd->second_pre.u_buffer = + cm->yv12_fb[second_ref_fb_idx].u_buffer + ref_uvoffset; + xd->second_pre.v_buffer = + cm->yv12_fb[second_ref_fb_idx].v_buffer + ref_uvoffset; + + /* propagate errors from reference frames */ + xd->corrupted |= cm->yv12_fb[second_ref_fb_idx].corrupted; + } + } - /* Distance of Mb to the various image edges. - * These are specified to 8th pel as they are always compared to - * values that are in 1/8th pel units - */ - xd->mb_to_top_edge = -((mb_row * 16)) << 3; - xd->mb_to_left_edge = -((mb_col * 16) << 3); -#if CONFIG_SUPERBLOCKS - if (mi->mbmi.encoded_as_sb) { - xd->mb_to_bottom_edge = ((pc->mb_rows - 2 - mb_row) * 16) << 3; - xd->mb_to_right_edge = ((pc->mb_cols - 2 - mb_col) * 16) << 3; - } else { -#endif - xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3; - xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3; #if CONFIG_SUPERBLOCKS + if (mbmi->sb_type) { + const int n_mbs = 1 << mbmi->sb_type; + const int y_mbs = MIN(n_mbs, cm->mb_rows - mb_row); + const int x_mbs = MIN(n_mbs, cm->mb_cols - mb_col); + const int mis = cm->mode_info_stride; + int x, y; + + for (y = 0; y < y_mbs; y++) { + for (x = !y; x < x_mbs; x++) { + mi[y * mis + x] = *mi; } + } + } #endif -#ifdef DEC_DEBUG - dec_debug = (pbi->common.current_video_frame == 46 && - mb_row == 5 && mb_col == 2); - if (dec_debug) -#if CONFIG_SUPERBLOCKS - printf("Enter Debug %d %d sb %d\n", mb_row, mb_col, - mi->mbmi.encoded_as_sb); -#else - printf("Enter Debug %d %d\n", mb_row, mb_col); -#endif -#endif - xd->up_available = (mb_row != 0); - xd->left_available = (mb_col != 0); - +} - recon_yoffset = (mb_row * recon_y_stride * 16) + (mb_col * 16); - recon_uvoffset = (mb_row * recon_uv_stride * 8) + (mb_col * 8); +/* Decode a row of Superblocks (2x2 region of MBs) */ +static void decode_sb_row(VP9D_COMP *pbi, VP9_COMMON *pc, + int mb_row, MACROBLOCKD *xd, + BOOL_DECODER* const bc) { + int mb_col; - xd->dst.y_buffer = pc->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset; - xd->dst.u_buffer = pc->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset; - xd->dst.v_buffer = pc->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset; + // For a SB there are 2 left contexts, each pertaining to a MB row within + vpx_memset(pc->left_context, 0, sizeof(pc->left_context)); + for (mb_col = 0; mb_col < pc->mb_cols; mb_col += 4) { +#if CONFIG_SUPERBLOCKS64 && CONFIG_SUPERBLOCKS + if (vp9_read(bc, pc->sb64_coded)) { + set_offsets(pbi, 64, mb_row, mb_col); vp9_decode_mb_mode_mv(pbi, xd, mb_row, mb_col, bc); + set_refs(pbi, 64, mb_row, mb_col); + decode_superblock64(pbi, xd, mb_row, mb_col, bc); + xd->corrupted |= bool_error(bc); + } else +#endif // CONFIG_SUPERBLOCKS64 + { + int j; - update_blockd_bmi(xd); -#ifdef DEC_DEBUG - if (dec_debug) - printf("Hello\n"); -#endif - - /* Select the appropriate reference frame for this MB */ - if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME) - ref_fb_idx = pc->lst_fb_idx; - else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME) - ref_fb_idx = pc->gld_fb_idx; - else - ref_fb_idx = pc->alt_fb_idx; - - xd->pre.y_buffer = pc->yv12_fb[ref_fb_idx].y_buffer + recon_yoffset; - xd->pre.u_buffer = pc->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset; - xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset; + for (j = 0; j < 4; j++) { + const int x_idx_sb = (j & 1) << 1, y_idx_sb = j & 2; - if (xd->mode_info_context->mbmi.second_ref_frame > 0) { - int second_ref_fb_idx; - - /* Select the appropriate reference frame for this MB */ - if (xd->mode_info_context->mbmi.second_ref_frame == LAST_FRAME) - second_ref_fb_idx = pc->lst_fb_idx; - else if (xd->mode_info_context->mbmi.second_ref_frame == - GOLDEN_FRAME) - second_ref_fb_idx = pc->gld_fb_idx; - else - second_ref_fb_idx = pc->alt_fb_idx; - - xd->second_pre.y_buffer = - pc->yv12_fb[second_ref_fb_idx].y_buffer + recon_yoffset; - xd->second_pre.u_buffer = - pc->yv12_fb[second_ref_fb_idx].u_buffer + recon_uvoffset; - xd->second_pre.v_buffer = - pc->yv12_fb[second_ref_fb_idx].v_buffer + recon_uvoffset; - } + if (mb_row + y_idx_sb >= pc->mb_rows || + mb_col + x_idx_sb >= pc->mb_cols) { + // MB lies outside frame, skip on to next + continue; + } - if (xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME) { - /* propagate errors from reference frames */ - xd->corrupted |= pc->yv12_fb[ref_fb_idx].corrupted; - } + xd->sb_index = j; #if CONFIG_SUPERBLOCKS - if (xd->mode_info_context->mbmi.encoded_as_sb) { - if (mb_col < pc->mb_cols - 1) - mi[1] = mi[0]; - if (mb_row < pc->mb_rows - 1) { - mi[pc->mode_info_stride] = mi[0]; - if (mb_col < pc->mb_cols - 1) - mi[pc->mode_info_stride + 1] = mi[0]; - } - } - if (xd->mode_info_context->mbmi.encoded_as_sb) { - decode_superblock(pbi, xd, mb_row, mb_col, bc); - } else { -#endif - vp9_intra_prediction_down_copy(xd); - decode_macroblock(pbi, xd, mb_row, mb_col, bc); -#if CONFIG_SUPERBLOCKS - } -#endif + if (vp9_read(bc, pc->sb32_coded)) { + set_offsets(pbi, 32, mb_row + y_idx_sb, mb_col + x_idx_sb); + vp9_decode_mb_mode_mv(pbi, + xd, mb_row + y_idx_sb, mb_col + x_idx_sb, bc); + set_refs(pbi, 32, mb_row + y_idx_sb, mb_col + x_idx_sb); + decode_superblock32(pbi, + xd, mb_row + y_idx_sb, mb_col + x_idx_sb, bc); + xd->corrupted |= bool_error(bc); + } else +#endif // CONFIG_SUPERBLOCKS + { + int i; + + // Process the 4 MBs within the SB in the order: + // top-left, top-right, bottom-left, bottom-right + for (i = 0; i < 4; i++) { + const int x_idx = x_idx_sb + (i & 1), y_idx = y_idx_sb + (i >> 1); + + if (mb_row + y_idx >= pc->mb_rows || + mb_col + x_idx >= pc->mb_cols) { + // MB lies outside frame, skip on to next + continue; + } - /* check if the boolean decoder has suffered an error */ - xd->corrupted |= bool_error(bc); + set_offsets(pbi, 16, mb_row + y_idx, mb_col + x_idx); + xd->mb_index = i; + vp9_decode_mb_mode_mv(pbi, xd, mb_row + y_idx, mb_col + x_idx, bc); + update_blockd_bmi(xd); + set_refs(pbi, 16, mb_row + y_idx, mb_col + x_idx); + vp9_intra_prediction_down_copy(xd); + decode_macroblock(pbi, xd, mb_row, mb_col, bc); -#if CONFIG_SUPERBLOCKS - if (mi->mbmi.encoded_as_sb) { - assert(!i); - mb_col += 2; - xd->mode_info_context += 2; - xd->prev_mode_info_context += 2; - break; + /* check if the boolean decoder has suffered an error */ + xd->corrupted |= bool_error(bc); + } + } } -#endif - - // skip to next MB - xd->mode_info_context += offset_extended; - xd->prev_mode_info_context += offset_extended; - mb_row += dy; - mb_col += dx; } } - - /* skip prediction column */ - xd->mode_info_context += 1 - (pc->mb_cols & 0x1) + xd->mode_info_stride; - xd->prev_mode_info_context += 1 - (pc->mb_cols & 0x1) + xd->mode_info_stride; } static unsigned int read_partition_size(const unsigned char *cx_size) { @@ -1462,7 +1608,10 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { } #if CONFIG_SUPERBLOCKS - pc->sb_coded = vp9_read_literal(&header_bc, 8); +#if CONFIG_SUPERBLOCKS64 + pc->sb64_coded = vp9_read_literal(&header_bc, 8); +#endif + pc->sb32_coded = vp9_read_literal(&header_bc, 8); #endif /* Read the loop filter level and type */ @@ -1727,12 +1876,8 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) { vpx_memset(pc->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * pc->mb_cols); - // Resset the macroblock mode info context to the start of the list - xd->mode_info_context = pc->mi; - xd->prev_mode_info_context = pc->prev_mi; - /* Decode a row of superblocks */ - for (mb_row = 0; mb_row < pc->mb_rows; mb_row += 2) { + for (mb_row = 0; mb_row < pc->mb_rows; mb_row += 4) { decode_sb_row(pbi, pc, mb_row, xd, &residual_bc); } corrupt_tokens |= xd->corrupted; diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index 956c16c..a8fdc66 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -562,19 +562,7 @@ static void write_mb_segid(vp9_writer *bc, const MB_MODE_INFO *mi, const MACROBLOCKD *xd) { // Encode the MB segment id. int seg_id = mi->segment_id; -#if CONFIG_SUPERBLOCKS - if (mi->encoded_as_sb) { - if (xd->mb_to_right_edge >= 0) - seg_id = seg_id && xd->mode_info_context[1].mbmi.segment_id; - if (xd->mb_to_bottom_edge >= 0) { - seg_id = seg_id && - xd->mode_info_context[xd->mode_info_stride].mbmi.segment_id; - if (xd->mb_to_right_edge >= 0) - seg_id = seg_id && - xd->mode_info_context[xd->mode_info_stride + 1].mbmi.segment_id; - } - } -#endif + if (xd->segmentation_enabled && xd->update_mb_segmentation_map) { switch (seg_id) { case 0: @@ -703,443 +691,364 @@ static void update_ref_probs(VP9_COMP *const cpi) { vp9_compute_mod_refprobs(cm); } -static void pack_inter_mode_mvs(VP9_COMP *const cpi, vp9_writer *const bc) { +static void pack_inter_mode_mvs(VP9_COMP *cpi, MODE_INFO *m, + vp9_writer *bc, + int mb_rows_left, int mb_cols_left) { VP9_COMMON *const pc = &cpi->common; const nmv_context *nmvc = &pc->fc.nmvc; - MACROBLOCKD *xd = &cpi->mb.e_mbd; - MODE_INFO *m; - MODE_INFO *prev_m; - TOKENEXTRA *tok = cpi->tok; - TOKENEXTRA *tok_end = tok + cpi->tok_count; - + MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; const int mis = pc->mode_info_stride; - int mb_row, mb_col; - int row, col; - - // Values used in prediction model coding - vp9_prob pred_prob; - unsigned char prediction_flag; - - int row_delta[4] = { 0, +1, 0, -1}; - int col_delta[4] = { +1, -1, +1, +1}; - - cpi->mb.partition_info = cpi->mb.pi; - - mb_row = 0; - for (row = 0; row < pc->mb_rows; row += 2) { - m = pc->mi + row * mis; - prev_m = pc->prev_mi + row * mis; - - mb_col = 0; - for (col = 0; col < pc->mb_cols; col += 2) { - int i; - - // Process the 4 MBs in the order: - // top-left, top-right, bottom-left, bottom-right + MB_MODE_INFO *const mi = &m->mbmi; + const MV_REFERENCE_FRAME rf = mi->ref_frame; + const MB_PREDICTION_MODE mode = mi->mode; + const int segment_id = mi->segment_id; #if CONFIG_SUPERBLOCKS - vp9_write(bc, m->mbmi.encoded_as_sb, pc->sb_coded); -#endif - for (i = 0; i < 4; i++) { - MB_MODE_INFO *mi; - MV_REFERENCE_FRAME rf; - MV_REFERENCE_FRAME sec_ref_frame; - MB_PREDICTION_MODE mode; - int segment_id, skip_coeff; - - int dy = row_delta[i]; - int dx = col_delta[i]; - int offset_extended = dy * mis + dx; - - if ((mb_row >= pc->mb_rows) || (mb_col >= pc->mb_cols)) { - // MB lies outside frame, move on - mb_row += dy; - mb_col += dx; - m += offset_extended; - prev_m += offset_extended; - cpi->mb.partition_info += offset_extended; - continue; - } + const int mb_size = 1 << mi->sb_type; +#else + const int mb_size = 1; +#endif + int skip_coeff; - mi = &m->mbmi; - rf = mi->ref_frame; - sec_ref_frame = mi->second_ref_frame; - mode = mi->mode; - segment_id = mi->segment_id; + int mb_row = pc->mb_rows - mb_rows_left; + int mb_col = pc->mb_cols - mb_cols_left; + xd->prev_mode_info_context = pc->prev_mi + (m - pc->mi); + x->partition_info = x->pi + (m - pc->mi); - // Distance of Mb to the various image edges. - // These specified to 8th pel as they are always compared to MV - // values that are in 1/8th pel units - xd->mb_to_left_edge = -((mb_col * 16) << 3); - xd->mb_to_top_edge = -((mb_row * 16)) << 3; + // Distance of Mb to the various image edges. + // These specified to 8th pel as they are always compared to MV + // values that are in 1/8th pel units + xd->mb_to_left_edge = -((mb_col * 16) << 3); + xd->mb_to_top_edge = -((mb_row * 16)) << 3; + xd->mb_to_right_edge = ((pc->mb_cols - mb_size - mb_col) * 16) << 3; + xd->mb_to_bottom_edge = ((pc->mb_rows - mb_size - mb_row) * 16) << 3; -#if CONFIG_SUPERBLOCKS - if (mi->encoded_as_sb) { - xd->mb_to_right_edge = ((pc->mb_cols - 2 - mb_col) * 16) << 3; - xd->mb_to_bottom_edge = ((pc->mb_rows - 2 - mb_row) * 16) << 3; - } else { -#endif - xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3; - xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3; -#if CONFIG_SUPERBLOCKS - } +#ifdef ENTROPY_STATS + active_section = 9; #endif - // Make sure the MacroBlockD mode info pointer is set correctly - xd->mode_info_context = m; - xd->prev_mode_info_context = prev_m; + if (cpi->mb.e_mbd.update_mb_segmentation_map) { + // Is temporal coding of the segment map enabled + if (pc->temporal_update) { + unsigned char prediction_flag = vp9_get_pred_flag(xd, PRED_SEG_ID); + vp9_prob pred_prob = vp9_get_pred_prob(pc, xd, PRED_SEG_ID); -#ifdef ENTROPY_STATS - active_section = 9; -#endif - if (cpi->mb.e_mbd.update_mb_segmentation_map) { - // Is temporal coding of the segment map enabled - if (pc->temporal_update) { - prediction_flag = vp9_get_pred_flag(xd, PRED_SEG_ID); - pred_prob = vp9_get_pred_prob(pc, xd, PRED_SEG_ID); + // Code the segment id prediction flag for this mb + vp9_write(bc, prediction_flag, pred_prob); - // Code the segment id prediction flag for this mb - vp9_write(bc, prediction_flag, pred_prob); + // If the mb segment id wasn't predicted code explicitly + if (!prediction_flag) + write_mb_segid(bc, mi, &cpi->mb.e_mbd); + } else { + // Normal unpredicted coding + write_mb_segid(bc, mi, &cpi->mb.e_mbd); + } + } - // If the mb segment id wasn't predicted code explicitly - if (!prediction_flag) - write_mb_segid(bc, mi, &cpi->mb.e_mbd); - } else { - // Normal unpredicted coding - write_mb_segid(bc, mi, &cpi->mb.e_mbd); - } - } + if (!pc->mb_no_coeff_skip) { + skip_coeff = 0; + } else if (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) && + vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0) { + skip_coeff = 1; + } else { + const int nmbs = mb_size; + const int xmbs = MIN(nmbs, mb_cols_left); + const int ymbs = MIN(nmbs, mb_rows_left); + int x, y; + + skip_coeff = 1; + for (y = 0; y < ymbs; y++) { + for (x = 0; x < xmbs; x++) { + skip_coeff = skip_coeff && m[y * mis + x].mbmi.mb_skip_coeff; + } + } - skip_coeff = 1; - if (pc->mb_no_coeff_skip && - (!vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) || - (vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) != 0))) { - skip_coeff = mi->mb_skip_coeff; -#if CONFIG_SUPERBLOCKS - if (mi->encoded_as_sb) { - skip_coeff &= m[1].mbmi.mb_skip_coeff; - skip_coeff &= m[mis].mbmi.mb_skip_coeff; - skip_coeff &= m[mis + 1].mbmi.mb_skip_coeff; - } -#endif - vp9_write(bc, skip_coeff, - vp9_get_pred_prob(pc, xd, PRED_MBSKIP)); - } + vp9_write(bc, skip_coeff, + vp9_get_pred_prob(pc, xd, PRED_MBSKIP)); + } - // Encode the reference frame. - if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_MODE) - || vp9_get_segdata(xd, segment_id, SEG_LVL_MODE) >= NEARESTMV) { - encode_ref_frame(bc, pc, xd, segment_id, rf); - } else { - assert(rf == INTRA_FRAME); - } + // Encode the reference frame. + if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_MODE) + || vp9_get_segdata(xd, segment_id, SEG_LVL_MODE) >= NEARESTMV) { + encode_ref_frame(bc, pc, xd, segment_id, rf); + } else { + assert(rf == INTRA_FRAME); + } - if (rf == INTRA_FRAME) { + if (rf == INTRA_FRAME) { #ifdef ENTROPY_STATS - active_section = 6; + active_section = 6; #endif - if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_MODE)) { + if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_MODE)) { #if CONFIG_SUPERBLOCKS - if (m->mbmi.encoded_as_sb) - write_sb_ymode(bc, mode, pc->fc.sb_ymode_prob); - else + if (m->mbmi.sb_type) + write_sb_ymode(bc, mode, pc->fc.sb_ymode_prob); + else #endif - write_ymode(bc, mode, pc->fc.ymode_prob); - } - if (mode == B_PRED) { - int j = 0; + write_ymode(bc, mode, pc->fc.ymode_prob); + } + if (mode == B_PRED) { + int j = 0; #if CONFIG_COMP_INTRA_PRED - int uses_second = - m->bmi[0].as_mode.second != - (B_PREDICTION_MODE)(B_DC_PRED - 1); - vp9_write(bc, uses_second, DEFAULT_COMP_INTRA_PROB); + int uses_second = + m->bmi[0].as_mode.second != + (B_PREDICTION_MODE)(B_DC_PRED - 1); + vp9_write(bc, uses_second, DEFAULT_COMP_INTRA_PROB); #endif - do { + do { #if CONFIG_COMP_INTRA_PRED - B_PREDICTION_MODE mode2 = m->bmi[j].as_mode.second; + B_PREDICTION_MODE mode2 = m->bmi[j].as_mode.second; #endif - write_bmode(bc, m->bmi[j].as_mode.first, - pc->fc.bmode_prob); + write_bmode(bc, m->bmi[j].as_mode.first, + pc->fc.bmode_prob); #if CONFIG_COMP_INTRA_PRED - if (uses_second) { - write_bmode(bc, mode2, pc->fc.bmode_prob); - } + if (uses_second) { + write_bmode(bc, mode2, pc->fc.bmode_prob); + } #endif - } while (++j < 16); - } - if (mode == I8X8_PRED) { - write_i8x8_mode(bc, m->bmi[0].as_mode.first, - pc->fc.i8x8_mode_prob); - write_i8x8_mode(bc, m->bmi[2].as_mode.first, - pc->fc.i8x8_mode_prob); - write_i8x8_mode(bc, m->bmi[8].as_mode.first, - pc->fc.i8x8_mode_prob); - write_i8x8_mode(bc, m->bmi[10].as_mode.first, - pc->fc.i8x8_mode_prob); - } else { - write_uv_mode(bc, mi->uv_mode, - pc->fc.uv_mode_prob[mode]); - } - } else { - vp9_prob mv_ref_p [VP9_MVREFS - 1]; - - vp9_mv_ref_probs(&cpi->common, mv_ref_p, mi->mb_mode_context[rf]); + } while (++j < 16); + } + if (mode == I8X8_PRED) { + write_i8x8_mode(bc, m->bmi[0].as_mode.first, + pc->fc.i8x8_mode_prob); + write_i8x8_mode(bc, m->bmi[2].as_mode.first, + pc->fc.i8x8_mode_prob); + write_i8x8_mode(bc, m->bmi[8].as_mode.first, + pc->fc.i8x8_mode_prob); + write_i8x8_mode(bc, m->bmi[10].as_mode.first, + pc->fc.i8x8_mode_prob); + } else { + write_uv_mode(bc, mi->uv_mode, + pc->fc.uv_mode_prob[mode]); + } + } else { + vp9_prob mv_ref_p[VP9_MVREFS - 1]; + vp9_mv_ref_probs(&cpi->common, mv_ref_p, mi->mb_mode_context[rf]); -// #ifdef ENTROPY_STATS + // #ifdef ENTROPY_STATS #ifdef ENTROPY_STATS - accum_mv_refs(mode, ct); - active_section = 3; + accum_mv_refs(mode, ct); + active_section = 3; #endif - // Is the segment coding of mode enabled - if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_MODE)) { + // Is the segment coding of mode enabled + if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_MODE)) { #if CONFIG_SUPERBLOCKS - if (mi->encoded_as_sb) { - write_sb_mv_ref(bc, mode, mv_ref_p); - } else + if (mi->sb_type) { + write_sb_mv_ref(bc, mode, mv_ref_p); + } else #endif - { - write_mv_ref(bc, mode, mv_ref_p); - } - vp9_accum_mv_refs(&cpi->common, mode, mi->mb_mode_context[rf]); - } + { + write_mv_ref(bc, mode, mv_ref_p); + } + vp9_accum_mv_refs(&cpi->common, mode, mi->mb_mode_context[rf]); + } #if CONFIG_PRED_FILTER - // Is the prediction filter enabled - if (mode >= NEARESTMV && mode < SPLITMV) { - if (cpi->common.pred_filter_mode == 2) - vp9_write(bc, mi->pred_filter_enabled, - pc->prob_pred_filter_off); - else - assert(mi->pred_filter_enabled == - cpi->common.pred_filter_mode); - } + // Is the prediction filter enabled + if (mode >= NEARESTMV && mode < SPLITMV) { + if (cpi->common.pred_filter_mode == 2) + vp9_write(bc, mi->pred_filter_enabled, + pc->prob_pred_filter_off); + else + assert(mi->pred_filter_enabled == + cpi->common.pred_filter_mode); + } #endif - if (mode >= NEARESTMV && mode <= SPLITMV) - { - if (cpi->common.mcomp_filter_type == SWITCHABLE) { - write_token(bc, vp9_switchable_interp_tree, - vp9_get_pred_probs(&cpi->common, xd, - PRED_SWITCHABLE_INTERP), - vp9_switchable_interp_encodings + - vp9_switchable_interp_map[mi->interp_filter]); - } else { - assert (mi->interp_filter == - cpi->common.mcomp_filter_type); - } - } + if (mode >= NEARESTMV && mode <= SPLITMV) { + if (cpi->common.mcomp_filter_type == SWITCHABLE) { + write_token(bc, vp9_switchable_interp_tree, + vp9_get_pred_probs(&cpi->common, xd, + PRED_SWITCHABLE_INTERP), + vp9_switchable_interp_encodings + + vp9_switchable_interp_map[mi->interp_filter]); + } else { + assert(mi->interp_filter == cpi->common.mcomp_filter_type); + } + } - // does the feature use compound prediction or not - // (if not specified at the frame/segment level) - if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) { - vp9_write(bc, mi->second_ref_frame > INTRA_FRAME, - vp9_get_pred_prob(pc, xd, PRED_COMP)); - } + // does the feature use compound prediction or not + // (if not specified at the frame/segment level) + if (cpi->common.comp_pred_mode == HYBRID_PREDICTION) { + vp9_write(bc, mi->second_ref_frame > INTRA_FRAME, + vp9_get_pred_prob(pc, xd, PRED_COMP)); + } #if CONFIG_COMP_INTERINTRA_PRED - if (cpi->common.use_interintra && - mode >= NEARESTMV && mode < SPLITMV && - mi->second_ref_frame <= INTRA_FRAME) { - vp9_write(bc, mi->second_ref_frame == INTRA_FRAME, - pc->fc.interintra_prob); - // if (!cpi->dummy_packing) - // printf("-- %d (%d)\n", mi->second_ref_frame == INTRA_FRAME, - // pc->fc.interintra_prob); - if (mi->second_ref_frame == INTRA_FRAME) { - // if (!cpi->dummy_packing) - // printf("** %d %d\n", mi->interintra_mode, - // mi->interintra_uv_mode); - write_ymode(bc, mi->interintra_mode, pc->fc.ymode_prob); + if (cpi->common.use_interintra && + mode >= NEARESTMV && mode < SPLITMV && + mi->second_ref_frame <= INTRA_FRAME) { + vp9_write(bc, mi->second_ref_frame == INTRA_FRAME, + pc->fc.interintra_prob); + // if (!cpi->dummy_packing) + // printf("-- %d (%d)\n", mi->second_ref_frame == INTRA_FRAME, + // pc->fc.interintra_prob); + if (mi->second_ref_frame == INTRA_FRAME) { + // if (!cpi->dummy_packing) + // printf("** %d %d\n", mi->interintra_mode, + // mi->interintra_uv_mode); + write_ymode(bc, mi->interintra_mode, pc->fc.ymode_prob); #if SEPARATE_INTERINTRA_UV - write_uv_mode(bc, mi->interintra_uv_mode, - pc->fc.uv_mode_prob[mi->interintra_mode]); + write_uv_mode(bc, mi->interintra_uv_mode, + pc->fc.uv_mode_prob[mi->interintra_mode]); #endif - } - } + } + } #endif #if CONFIG_NEW_MVREF - // if ((mode == NEWMV) || (mode == SPLITMV)) { - if (mode == NEWMV) { - // Encode the index of the choice. - vp9_write_mv_ref_id(bc, - xd->mb_mv_ref_probs[rf], mi->best_index); - - if (mi->second_ref_frame > 0) { - // Encode the index of the choice. - vp9_write_mv_ref_id( - bc, xd->mb_mv_ref_probs[mi->second_ref_frame], - mi->best_second_index); - } - } + // if ((mode == NEWMV) || (mode == SPLITMV)) { + if (mode == NEWMV) { + // Encode the index of the choice. + vp9_write_mv_ref_id(bc, + xd->mb_mv_ref_probs[rf], mi->best_index); + + if (mi->second_ref_frame > 0) { + // Encode the index of the choice. + vp9_write_mv_ref_id( + bc, xd->mb_mv_ref_probs[mi->second_ref_frame], + mi->best_second_index); + } + } #endif - { - switch (mode) { /* new, split require MVs */ - case NEWMV: + + switch (mode) { /* new, split require MVs */ + case NEWMV: #ifdef ENTROPY_STATS - active_section = 5; + active_section = 5; #endif - write_nmv(bc, &mi->mv[0].as_mv, &mi->best_mv, - (const nmv_context*) nmvc, - xd->allow_high_precision_mv); + write_nmv(bc, &mi->mv[0].as_mv, &mi->best_mv, + (const nmv_context*) nmvc, + xd->allow_high_precision_mv); - if (mi->second_ref_frame > 0) { - write_nmv(bc, &mi->mv[1].as_mv, &mi->best_second_mv, - (const nmv_context*) nmvc, - xd->allow_high_precision_mv); - } - break; - case SPLITMV: { - int j = 0; + if (mi->second_ref_frame > 0) { + write_nmv(bc, &mi->mv[1].as_mv, &mi->best_second_mv, + (const nmv_context*) nmvc, + xd->allow_high_precision_mv); + } + break; + case SPLITMV: { + int j = 0; #ifdef MODE_STATS - ++count_mb_seg [mi->partitioning]; + ++count_mb_seg[mi->partitioning]; #endif - write_split(bc, mi->partitioning, cpi->common.fc.mbsplit_prob); - cpi->mbsplit_count[mi->partitioning]++; - - do { - B_PREDICTION_MODE blockmode; - int_mv blockmv; - const int *const L = - vp9_mbsplits [mi->partitioning]; - int k = -1; /* first block in subset j */ - int mv_contz; - int_mv leftmv, abovemv; + write_split(bc, mi->partitioning, cpi->common.fc.mbsplit_prob); + cpi->mbsplit_count[mi->partitioning]++; - blockmode = cpi->mb.partition_info->bmi[j].mode; - blockmv = cpi->mb.partition_info->bmi[j].mv; + do { + B_PREDICTION_MODE blockmode; + int_mv blockmv; + const int *const L = vp9_mbsplits[mi->partitioning]; + int k = -1; /* first block in subset j */ + int mv_contz; + int_mv leftmv, abovemv; + + blockmode = cpi->mb.partition_info->bmi[j].mode; + blockmv = cpi->mb.partition_info->bmi[j].mv; #if CONFIG_DEBUG - while (j != L[++k]) - if (k >= 16) - assert(0); + while (j != L[++k]) + if (k >= 16) + assert(0); #else - while (j != L[++k]); + while (j != L[++k]); #endif - leftmv.as_int = left_block_mv(m, k); - abovemv.as_int = above_block_mv(m, k, mis); - mv_contz = vp9_mv_cont(&leftmv, &abovemv); + leftmv.as_int = left_block_mv(m, k); + abovemv.as_int = above_block_mv(m, k, mis); + mv_contz = vp9_mv_cont(&leftmv, &abovemv); - write_sub_mv_ref(bc, blockmode, - cpi->common.fc.sub_mv_ref_prob [mv_contz]); - cpi->sub_mv_ref_count[mv_contz][blockmode - LEFT4X4]++; - if (blockmode == NEW4X4) { + write_sub_mv_ref(bc, blockmode, + cpi->common.fc.sub_mv_ref_prob[mv_contz]); + cpi->sub_mv_ref_count[mv_contz][blockmode - LEFT4X4]++; + if (blockmode == NEW4X4) { #ifdef ENTROPY_STATS - active_section = 11; -#endif - write_nmv(bc, &blockmv.as_mv, &mi->best_mv, - (const nmv_context*) nmvc, - xd->allow_high_precision_mv); - - if (mi->second_ref_frame > 0) { - write_nmv(bc, - &cpi->mb.partition_info->bmi[j].second_mv.as_mv, - &mi->best_second_mv, - (const nmv_context*) nmvc, - xd->allow_high_precision_mv); - } - } - } while (++j < cpi->mb.partition_info->count); - } - break; - default: - break; + active_section = 11; +#endif + write_nmv(bc, &blockmv.as_mv, &mi->best_mv, + (const nmv_context*) nmvc, + xd->allow_high_precision_mv); + + if (mi->second_ref_frame > 0) { + write_nmv(bc, + &cpi->mb.partition_info->bmi[j].second_mv.as_mv, + &mi->best_second_mv, + (const nmv_context*) nmvc, + xd->allow_high_precision_mv); } } - } + } while (++j < cpi->mb.partition_info->count); + break; + } + default: + break; + } + } - if (((rf == INTRA_FRAME && mode <= I8X8_PRED) || - (rf != INTRA_FRAME && !(mode == SPLITMV && - mi->partitioning == PARTITIONING_4X4))) && - pc->txfm_mode == TX_MODE_SELECT && - !((pc->mb_no_coeff_skip && skip_coeff) || - (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) && - vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0))) { - TX_SIZE sz = mi->txfm_size; - // FIXME(rbultje) code ternary symbol once all experiments are merged - vp9_write(bc, sz != TX_4X4, pc->prob_tx[0]); - if (sz != TX_4X4 && mode != I8X8_PRED && mode != SPLITMV) { - vp9_write(bc, sz != TX_8X8, pc->prob_tx[1]); + if (((rf == INTRA_FRAME && mode <= I8X8_PRED) || + (rf != INTRA_FRAME && !(mode == SPLITMV && + mi->partitioning == PARTITIONING_4X4))) && + pc->txfm_mode == TX_MODE_SELECT && + !((pc->mb_no_coeff_skip && skip_coeff) || + (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) && + vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0))) { + TX_SIZE sz = mi->txfm_size; + // FIXME(rbultje) code ternary symbol once all experiments are merged + vp9_write(bc, sz != TX_4X4, pc->prob_tx[0]); + if (sz != TX_4X4 && mode != I8X8_PRED && mode != SPLITMV) { + vp9_write(bc, sz != TX_8X8, pc->prob_tx[1]); #if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS - if (mi->encoded_as_sb && sz != TX_8X8) - vp9_write(bc, sz != TX_16X16, pc->prob_tx[2]); -#endif - } - } - -#ifdef ENTROPY_STATS - active_section = 1; -#endif - assert(tok < tok_end); - pack_mb_tokens(bc, &tok, tok_end); - -#if CONFIG_SUPERBLOCKS - if (m->mbmi.encoded_as_sb) { - assert(!i); - mb_col += 2; - m += 2; - cpi->mb.partition_info += 2; - prev_m += 2; - break; - } -#endif - - // Next MB - mb_row += dy; - mb_col += dx; - m += offset_extended; - prev_m += offset_extended; - cpi->mb.partition_info += offset_extended; -#if CONFIG_DEBUG - assert((prev_m - cpi->common.prev_mip) == (m - cpi->common.mip)); - assert((prev_m - cpi->common.prev_mi) == (m - cpi->common.mi)); + if (mi->sb_type && sz != TX_8X8) + vp9_write(bc, sz != TX_16X16, pc->prob_tx[2]); #endif - } } - - // Next SB - mb_row += 2; - m += mis + (1 - (pc->mb_cols & 0x1)); - prev_m += mis + (1 - (pc->mb_cols & 0x1)); - cpi->mb.partition_info += mis + (1 - (pc->mb_cols & 0x1)); } } - -static void write_mb_modes_kf(const VP9_COMMON *c, - const MACROBLOCKD *xd, - const MODE_INFO *m, - int mode_info_stride, - vp9_writer *const bc) { - int ym; - int segment_id; - - ym = m->mbmi.mode; - segment_id = m->mbmi.segment_id; +static void write_mb_modes_kf(const VP9_COMP *cpi, + const MODE_INFO *m, + vp9_writer *bc, + int mb_rows_left, int mb_cols_left) { + const VP9_COMMON *const c = &cpi->common; + const MACROBLOCKD *const xd = &cpi->mb.e_mbd; + const int mis = c->mode_info_stride; + const int ym = m->mbmi.mode; + const int segment_id = m->mbmi.segment_id; + int skip_coeff; if (xd->update_mb_segmentation_map) { write_mb_segid(bc, &m->mbmi, xd); } - if (c->mb_no_coeff_skip && - (!vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) || - (vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) != 0))) { - int skip_coeff = m->mbmi.mb_skip_coeff; + if (!c->mb_no_coeff_skip) { + skip_coeff = 0; + } else if (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) && + vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0) { + skip_coeff = 1; + } else { #if CONFIG_SUPERBLOCKS - const int mis = mode_info_stride; - if (m->mbmi.encoded_as_sb) { - skip_coeff &= m[1].mbmi.mb_skip_coeff; - skip_coeff &= m[mis].mbmi.mb_skip_coeff; - skip_coeff &= m[mis + 1].mbmi.mb_skip_coeff; - } + const int nmbs = 1 << m->mbmi.sb_type; +#else + const int nmbs = 1; #endif - vp9_write(bc, skip_coeff, - vp9_get_pred_prob(c, xd, PRED_MBSKIP)); + const int xmbs = MIN(nmbs, mb_cols_left); + const int ymbs = MIN(nmbs, mb_rows_left); + int x, y; + + skip_coeff = 1; + for (y = 0; y < ymbs; y++) { + for (x = 0; x < xmbs; x++) { + skip_coeff = skip_coeff && m[y * mis + x].mbmi.mb_skip_coeff; + } + } + + vp9_write(bc, skip_coeff, + vp9_get_pred_prob(c, xd, PRED_MBSKIP)); } #if CONFIG_SUPERBLOCKS - if (m->mbmi.encoded_as_sb) { + if (m->mbmi.sb_type) { sb_kfwrite_ymode(bc, ym, c->sb_kf_ymode_prob[c->kf_ymode_probs_index]); } else @@ -1150,7 +1059,6 @@ static void write_mb_modes_kf(const VP9_COMMON *c, } if (ym == B_PRED) { - const int mis = c->mode_info_stride; int i = 0; #if CONFIG_COMP_INTRA_PRED int uses_second = @@ -1195,7 +1103,7 @@ static void write_mb_modes_kf(const VP9_COMMON *c, write_uv_mode(bc, m->mbmi.uv_mode, c->kf_uv_mode_prob[ym]); if (ym <= I8X8_PRED && c->txfm_mode == TX_MODE_SELECT && - !((c->mb_no_coeff_skip && m->mbmi.mb_skip_coeff) || + !((c->mb_no_coeff_skip && skip_coeff) || (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) && vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0))) { TX_SIZE sz = m->mbmi.txfm_size; @@ -1204,75 +1112,99 @@ static void write_mb_modes_kf(const VP9_COMMON *c, if (sz != TX_4X4 && ym <= TM_PRED) { vp9_write(bc, sz != TX_8X8, c->prob_tx[1]); #if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS - if (m->mbmi.encoded_as_sb && sz != TX_8X8) + if (m->mbmi.sb_type && sz != TX_8X8) vp9_write(bc, sz != TX_16X16, c->prob_tx[2]); #endif } } } -static void write_kfmodes(VP9_COMP* const cpi, vp9_writer* const bc) { +static void write_modes_b(VP9_COMP *cpi, MODE_INFO *m, vp9_writer *bc, + TOKENEXTRA **tok, TOKENEXTRA *tok_end, + int mb_row, int mb_col) { + VP9_COMMON *const c = &cpi->common; + MACROBLOCKD *const xd = &cpi->mb.e_mbd; + + xd->mode_info_context = m; + if (c->frame_type == KEY_FRAME) { + write_mb_modes_kf(cpi, m, bc, + c->mb_rows - mb_row, c->mb_cols - mb_col); +#ifdef ENTROPY_STATS + active_section = 8; +#endif + } else { + pack_inter_mode_mvs(cpi, m, bc, + c->mb_rows - mb_row, c->mb_cols - mb_col); +#ifdef ENTROPY_STATS + active_section = 1; +#endif + } + + assert(*tok < tok_end); + pack_mb_tokens(bc, tok, tok_end); +} + +static void write_modes(VP9_COMP *cpi, vp9_writer* const bc) { VP9_COMMON *const c = &cpi->common; const int mis = c->mode_info_stride; - MACROBLOCKD *xd = &cpi->mb.e_mbd; - MODE_INFO *m; - int i; - int row, col; - int mb_row, mb_col; - int row_delta[4] = { 0, +1, 0, -1}; - int col_delta[4] = { +1, -1, +1, +1}; + MODE_INFO *m, *m_ptr = c->mi; + int i, mb_row, mb_col; TOKENEXTRA *tok = cpi->tok; TOKENEXTRA *tok_end = tok + cpi->tok_count; - mb_row = 0; - for (row = 0; row < c->mb_rows; row += 2) { - m = c->mi + row * mis; + for (mb_row = 0; mb_row < c->mb_rows; mb_row += 4, m_ptr += 4 * mis) { + m = m_ptr; + for (mb_col = 0; mb_col < c->mb_cols; mb_col += 4, m += 4) { +#if CONFIG_SUPERBLOCKS && CONFIG_SUPERBLOCKS64 + vp9_write(bc, m->mbmi.sb_type == BLOCK_SIZE_SB64X64, c->sb64_coded); + if (m->mbmi.sb_type == BLOCK_SIZE_SB64X64) { + write_modes_b(cpi, m, bc, &tok, tok_end, mb_row, mb_col); + } else +#endif + { + int j; - mb_col = 0; - for (col = 0; col < c->mb_cols; col += 2) { + for (j = 0; j < 4; j++) { + const int x_idx_sb = (j & 1) << 1, y_idx_sb = j & 2; #if CONFIG_SUPERBLOCKS - vp9_write(bc, m->mbmi.encoded_as_sb, c->sb_coded); -#endif - // Process the 4 MBs in the order: - // top-left, top-right, bottom-left, bottom-right - for (i = 0; i < 4; i++) { - int dy = row_delta[i]; - int dx = col_delta[i]; - int offset_extended = dy * mis + dx; - - if ((mb_row >= c->mb_rows) || (mb_col >= c->mb_cols)) { - // MB lies outside frame, move on - mb_row += dy; - mb_col += dx; - m += offset_extended; - continue; - } + MODE_INFO *sb_m = m + y_idx_sb * mis + x_idx_sb; +#endif - // Make sure the MacroBlockD mode info pointer is set correctly - xd->mode_info_context = m; + if (mb_col + x_idx_sb >= c->mb_cols || + mb_row + y_idx_sb >= c->mb_rows) + continue; - write_mb_modes_kf(c, xd, m, mis, bc); -#ifdef ENTROPY_STATS - active_section = 8; +#if CONFIG_SUPERBLOCKS + vp9_write(bc, sb_m->mbmi.sb_type, c->sb32_coded); + if (sb_m->mbmi.sb_type) { + assert(sb_m->mbmi.sb_type == BLOCK_SIZE_SB32X32); + write_modes_b(cpi, sb_m, bc, &tok, tok_end, + mb_row + y_idx_sb, mb_col + x_idx_sb); + } else #endif - assert(tok < tok_end); - pack_mb_tokens(bc, &tok, tok_end); + { + // Process the 4 MBs in the order: + // top-left, top-right, bottom-left, bottom-right + for (i = 0; i < 4; i++) { + const int x_idx = x_idx_sb + (i & 1), y_idx = y_idx_sb + (i >> 1); + MODE_INFO *mb_m = m + x_idx + y_idx * mis; + + if (mb_row + y_idx >= c->mb_rows || + mb_col + x_idx >= c->mb_cols) { + // MB lies outside frame, move on + continue; + } #if CONFIG_SUPERBLOCKS - if (m->mbmi.encoded_as_sb) { - assert(!i); - mb_col += 2; - m += 2; - break; - } + assert(mb_m->mbmi.sb_type == BLOCK_SIZE_MB16X16); #endif - // Next MB - mb_row += dy; - mb_col += dx; - m += offset_extended; + write_modes_b(cpi, mb_m, bc, &tok, tok_end, + mb_row + y_idx, mb_col + x_idx); + } + } + } } } - mb_row += 2; } } @@ -1800,13 +1732,12 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, } #if CONFIG_SUPERBLOCKS - { - /* sb mode probability */ - const int sb_max = (((pc->mb_rows + 1) >> 1) * ((pc->mb_cols + 1) >> 1)); - - pc->sb_coded = get_prob(sb_max - cpi->sb_count, sb_max); - vp9_write_literal(&header_bc, pc->sb_coded, 8); - } +#if CONFIG_SUPERBLOCKS64 + pc->sb64_coded = get_binary_prob(cpi->sb64_count[0], cpi->sb64_count[1]); + vp9_write_literal(&header_bc, pc->sb64_coded, 8); +#endif + pc->sb32_coded = get_binary_prob(cpi->sb32_count[0], cpi->sb32_count[1]); + vp9_write_literal(&header_bc, pc->sb32_coded, 8); #endif { @@ -2195,12 +2126,12 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest, if (pc->frame_type == KEY_FRAME) { decide_kf_ymode_entropy(cpi); - write_kfmodes(cpi, &residual_bc); + write_modes(cpi, &residual_bc); } else { /* This is not required if the counts in cpi are consistent with the * final packing pass */ // if (!cpi->dummy_packing) vp9_zero(cpi->NMVcount); - pack_inter_mode_mvs(cpi, &residual_bc); + write_modes(cpi, &residual_bc); vp9_update_mode_context(&cpi->common); } diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index f5cfbd1..e8f6f46 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -181,10 +181,13 @@ typedef struct macroblock { // Structure to hold context for each of the 4 MBs within a SB: // when encoded as 4 independent MBs: - PICK_MODE_CONTEXT mb_context[4]; + PICK_MODE_CONTEXT mb_context[4][4]; #if CONFIG_SUPERBLOCKS // when 4 MBs share coding parameters: - PICK_MODE_CONTEXT sb_context[4]; + PICK_MODE_CONTEXT sb32_context[4]; +#if CONFIG_SUPERBLOCKS64 + PICK_MODE_CONTEXT sb64_context; +#endif // CONFIG_SUPERBLOCKS64 #endif void (*vp9_short_fdct4x4)(int16_t *input, int16_t *output, int pitch); diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 3219e12..2192950 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -47,14 +47,17 @@ int enc_debug = 0; extern void select_interp_filter_type(VP9_COMP *cpi); -static void encode_macroblock(VP9_COMP *cpi, MACROBLOCK *x, - TOKENEXTRA **t, int recon_yoffset, - int recon_uvoffset, int output_enabled, - int mb_col, int mb_row); +static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t, + int recon_yoffset, int recon_uvoffset, + int output_enabled, int mb_row, int mb_col); -static void encode_superblock(VP9_COMP *cpi, MACROBLOCK *x, - TOKENEXTRA **t, int recon_yoffset, - int recon_uvoffset, int mb_col, int mb_row); +static void encode_superblock32(VP9_COMP *cpi, TOKENEXTRA **t, + int recon_yoffset, int recon_uvoffset, + int output_enabled, int mb_row, int mb_col); + +static void encode_superblock64(VP9_COMP *cpi, TOKENEXTRA **t, + int recon_yoffset, int recon_uvoffset, + int output_enabled, int mb_row, int mb_col); static void adjust_act_zbin(VP9_COMP *cpi, MACROBLOCK *x); @@ -431,37 +434,45 @@ static unsigned int pick_best_mv_ref(MACROBLOCK *x, #endif static void update_state(VP9_COMP *cpi, MACROBLOCK *x, - PICK_MODE_CONTEXT *ctx) { - int i; + PICK_MODE_CONTEXT *ctx, int block_size, + int output_enabled) { + int i, x_idx, y; MACROBLOCKD *xd = &x->e_mbd; MODE_INFO *mi = &ctx->mic; - MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi; + MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; int mb_mode = mi->mbmi.mode; int mb_mode_index = ctx->best_mode_index; + const int mis = cpi->common.mode_info_stride; +#if CONFIG_SUPERBLOCKS + int mb_block_size = 1 << mi->mbmi.sb_type; +#else + int mb_block_size = 1; +#endif #if CONFIG_DEBUG assert(mb_mode < MB_MODE_COUNT); assert(mb_mode_index < MAX_MODES); assert(mi->mbmi.ref_frame < MAX_REF_FRAMES); #endif +#if CONFIG_SUPERBLOCKS + assert(mi->mbmi.sb_type == (block_size >> 5)); +#endif // Restore the coding context of the MB to that that was in place // when the mode was picked for it - vpx_memcpy(xd->mode_info_context, mi, sizeof(MODE_INFO)); -#if CONFIG_SUPERBLOCKS - if (mi->mbmi.encoded_as_sb) { - const int mis = cpi->common.mode_info_stride; - if (xd->mb_to_right_edge >= 0) - vpx_memcpy(xd->mode_info_context + 1, mi, sizeof(MODE_INFO)); - if (xd->mb_to_bottom_edge >= 0) { - vpx_memcpy(xd->mode_info_context + mis, mi, sizeof(MODE_INFO)); - if (xd->mb_to_right_edge >= 0) - vpx_memcpy(xd->mode_info_context + mis + 1, mi, sizeof(MODE_INFO)); + for (y = 0; y < mb_block_size; y++) { + for (x_idx = 0; x_idx < mb_block_size; x_idx++) { + if ((xd->mb_to_right_edge >> 7) + mb_block_size > x_idx && + (xd->mb_to_bottom_edge >> 7) + mb_block_size > y) { + MODE_INFO *mi_addr = xd->mode_info_context + x_idx + y * mis; + + vpx_memcpy(mi_addr, mi, sizeof(MODE_INFO)); + } } + } #if CONFIG_TX32X32 && CONFIG_SUPERBLOCKS - } else { + if (block_size == 16) { ctx->txfm_rd_diff[ALLOW_32X32] = ctx->txfm_rd_diff[ALLOW_16X16]; -#endif } #endif @@ -482,6 +493,9 @@ static void update_state(VP9_COMP *cpi, MACROBLOCK *x, mbmi->mv[1].as_int = x->partition_info->bmi[15].second_mv.as_int; } + if (!output_enabled) + return; + { int segment_id = mbmi->segment_id; if (!vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) || @@ -603,6 +617,135 @@ static void update_state(VP9_COMP *cpi, MACROBLOCK *x, } } +static unsigned find_seg_id(uint8_t *buf, int block_size, + int start_y, int height, int start_x, int width) { + const int end_x = MIN(start_x + block_size, width); + const int end_y = MIN(start_y + block_size, height); + int x, y; + unsigned seg_id = -1; + + buf += width * start_y; + for (y = start_y; y < end_y; y++, buf += width) { + for (x = start_x; x < end_x; x++) { + seg_id = MIN(seg_id, buf[x]); + } + } + + return seg_id; +} + +static void set_offsets(VP9_COMP *cpi, + int mb_row, int mb_col, int block_size, + int *ref_yoffset, int *ref_uvoffset) { + MACROBLOCK *const x = &cpi->mb; + VP9_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &x->e_mbd; + MB_MODE_INFO *mbmi; + const int dst_fb_idx = cm->new_fb_idx; + const int recon_y_stride = cm->yv12_fb[dst_fb_idx].y_stride; + const int recon_uv_stride = cm->yv12_fb[dst_fb_idx].uv_stride; + const int recon_yoffset = 16 * mb_row * recon_y_stride + 16 * mb_col; + const int recon_uvoffset = 8 * mb_row * recon_uv_stride + 8 * mb_col; + const int src_y_stride = x->src.y_stride; + const int src_uv_stride = x->src.uv_stride; + const int src_yoffset = 16 * mb_row * src_y_stride + 16 * mb_col; + const int src_uvoffset = 8 * mb_row * src_uv_stride + 8 * mb_col; + const int ref_fb_idx = cm->lst_fb_idx; + const int ref_y_stride = cm->yv12_fb[ref_fb_idx].y_stride; + const int ref_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride; + const int idx_map = mb_row * cm->mb_cols + mb_col; + const int idx_str = xd->mode_info_stride * mb_row + mb_col; + + // entropy context structures + xd->above_context = cm->above_context + mb_col; + xd->left_context = cm->left_context + (mb_row & 3); + + // GF active flags data structure + x->gf_active_ptr = (signed char *)&cpi->gf_active_flags[idx_map]; + + // Activity map pointer + x->mb_activity_ptr = &cpi->mb_activity_map[idx_map]; + x->active_ptr = cpi->active_map + idx_map; + + /* pointers to mode info contexts */ + x->partition_info = x->pi + idx_str; + xd->mode_info_context = cm->mi + idx_str; + mbmi = &xd->mode_info_context->mbmi; + xd->prev_mode_info_context = cm->prev_mi + idx_str; + + // Set up destination pointers + xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset; + xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset; + xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset; + + /* Set up limit values for MV components to prevent them from + * extending beyond the UMV borders assuming 16x16 block size */ + x->mv_row_min = -((mb_row * 16) + VP9BORDERINPIXELS - VP9_INTERP_EXTEND); + x->mv_col_min = -((mb_col * 16) + VP9BORDERINPIXELS - VP9_INTERP_EXTEND); + x->mv_row_max = ((cm->mb_rows - mb_row) * 16 + + (VP9BORDERINPIXELS - block_size - VP9_INTERP_EXTEND)); + x->mv_col_max = ((cm->mb_cols - mb_col) * 16 + + (VP9BORDERINPIXELS - block_size - VP9_INTERP_EXTEND)); + + // Set up distance of MB to edge of frame in 1/8th pel units + block_size >>= 4; // in macroblock units + assert(!(mb_col & (block_size - 1)) && !(mb_row & (block_size - 1))); + xd->mb_to_top_edge = -((mb_row * 16) << 3); + xd->mb_to_left_edge = -((mb_col * 16) << 3); + xd->mb_to_bottom_edge = ((cm->mb_rows - block_size - mb_row) * 16) << 3; + xd->mb_to_right_edge = ((cm->mb_cols - block_size - mb_col) * 16) << 3; + + // Are edges available for intra prediction? + xd->up_available = (mb_row != 0); + xd->left_available = (mb_col != 0); + + /* Reference buffer offsets */ + *ref_yoffset = (mb_row * ref_y_stride * 16) + (mb_col * 16); + *ref_uvoffset = (mb_row * ref_uv_stride * 8) + (mb_col * 8); + + /* set up source buffers */ + x->src.y_buffer = cpi->Source->y_buffer + src_yoffset; + x->src.u_buffer = cpi->Source->u_buffer + src_uvoffset; + x->src.v_buffer = cpi->Source->v_buffer + src_uvoffset; + + /* R/D setup */ + x->rddiv = cpi->RDDIV; + x->rdmult = cpi->RDMULT; + + /* segment ID */ + if (xd->segmentation_enabled) { + if (xd->update_mb_segmentation_map) { + mbmi->segment_id = find_seg_id(cpi->segmentation_map, block_size, + mb_row, cm->mb_rows, mb_col, cm->mb_cols); + } else { + mbmi->segment_id = find_seg_id(cm->last_frame_seg_map, block_size, + mb_row, cm->mb_rows, mb_col, cm->mb_cols); + } + assert(mbmi->segment_id <= 3); + vp9_mb_init_quantizer(cpi, x); + + if (xd->segmentation_enabled && cpi->seg0_cnt > 0 && + !vp9_segfeature_active(xd, 0, SEG_LVL_REF_FRAME) && + vp9_segfeature_active(xd, 1, SEG_LVL_REF_FRAME) && + vp9_check_segref(xd, 1, INTRA_FRAME) + + vp9_check_segref(xd, 1, LAST_FRAME) + + vp9_check_segref(xd, 1, GOLDEN_FRAME) + + vp9_check_segref(xd, 1, ALTREF_FRAME) == 1) { + cpi->seg0_progress = (cpi->seg0_idx << 16) / cpi->seg0_cnt; + } else { + const int y = mb_row & ~3; + const int x = mb_col & ~3; + const int p16 = ((mb_row & 1) << 1) + (mb_col & 1); + const int p32 = ((mb_row & 2) << 2) + ((mb_col & 2) << 1); + + cpi->seg0_progress = + ((y * cm->mb_cols + x * 4 + p32 + p16) << 16) / cm->MBs; + } + } else { + mbmi->segment_id = 0; + } +} + static void pick_mb_modes(VP9_COMP *cpi, VP9_COMMON *cm, int mb_row, @@ -613,24 +756,15 @@ static void pick_mb_modes(VP9_COMP *cpi, int *totalrate, int *totaldist) { int i; - int map_index; int recon_yoffset, recon_uvoffset; - int ref_fb_idx = cm->lst_fb_idx; - int dst_fb_idx = cm->new_fb_idx; - int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride; - int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride; ENTROPY_CONTEXT_PLANES left_context[2]; ENTROPY_CONTEXT_PLANES above_context[2]; ENTROPY_CONTEXT_PLANES *initial_above_context_ptr = cm->above_context + mb_col; - // Offsets to move pointers from MB to MB within a SB in raster order - int row_delta[4] = { 0, +1, 0, -1}; - int col_delta[4] = { +1, -1, +1, +1}; - /* Function should not modify L & A contexts; save and restore on exit */ vpx_memcpy(left_context, - cm->left_context, + cm->left_context + (mb_row & 2), sizeof(left_context)); vpx_memcpy(above_context, initial_above_context_ptr, @@ -638,113 +772,36 @@ static void pick_mb_modes(VP9_COMP *cpi, /* Encode MBs in raster order within the SB */ for (i = 0; i < 4; i++) { - int dy = row_delta[i]; - int dx = col_delta[i]; - int offset_unextended = dy * cm->mb_cols + dx; - int offset_extended = dy * xd->mode_info_stride + dx; - MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi; + const int x_idx = i & 1, y_idx = i >> 1; + MB_MODE_INFO *mbmi; - // TODO Many of the index items here can be computed more efficiently! - - if ((mb_row >= cm->mb_rows) || (mb_col >= cm->mb_cols)) { + if ((mb_row + y_idx >= cm->mb_rows) || (mb_col + x_idx >= cm->mb_cols)) { // MB lies outside frame, move on - mb_row += dy; - mb_col += dx; - - // Update pointers - x->src.y_buffer += 16 * (dx + dy * x->src.y_stride); - x->src.u_buffer += 8 * (dx + dy * x->src.uv_stride); - x->src.v_buffer += 8 * (dx + dy * x->src.uv_stride); - - x->gf_active_ptr += offset_unextended; - x->partition_info += offset_extended; - xd->mode_info_context += offset_extended; - xd->prev_mode_info_context += offset_extended; -#if CONFIG_DEBUG - assert((xd->prev_mode_info_context - cpi->common.prev_mip) == - (xd->mode_info_context - cpi->common.mip)); -#endif continue; } // Index of the MB in the SB 0..3 xd->mb_index = i; - - map_index = (mb_row * cpi->common.mb_cols) + mb_col; - x->mb_activity_ptr = &cpi->mb_activity_map[map_index]; - - // set above context pointer - xd->above_context = cm->above_context + mb_col; - - // Restore the appropriate left context depending on which - // row in the SB the MB is situated - xd->left_context = cm->left_context + (i >> 1); - - // Set up distance of MB to edge of frame in 1/8th pel units - xd->mb_to_top_edge = -((mb_row * 16) << 3); - xd->mb_to_left_edge = -((mb_col * 16) << 3); - xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3; - xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3; - - // Set up limit values for MV components to prevent them from - // extending beyond the UMV borders assuming 16x16 block size - x->mv_row_min = -((mb_row * 16) + VP9BORDERINPIXELS - VP9_INTERP_EXTEND); - x->mv_col_min = -((mb_col * 16) + VP9BORDERINPIXELS - VP9_INTERP_EXTEND); - x->mv_row_max = ((cm->mb_rows - mb_row) * 16 + - (VP9BORDERINPIXELS - 16 - VP9_INTERP_EXTEND)); - x->mv_col_max = ((cm->mb_cols - mb_col) * 16 + - (VP9BORDERINPIXELS - 16 - VP9_INTERP_EXTEND)); - - xd->up_available = (mb_row != 0); - xd->left_available = (mb_col != 0); - - recon_yoffset = (mb_row * recon_y_stride * 16) + (mb_col * 16); - recon_uvoffset = (mb_row * recon_uv_stride * 8) + (mb_col * 8); - - xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset; - xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset; - xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset; + set_offsets(cpi, mb_row + y_idx, mb_col + x_idx, 16, + &recon_yoffset, &recon_uvoffset); #if !CONFIG_SUPERBLOCKS // Copy current MB to a work buffer vp9_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16); #endif - x->rddiv = cpi->RDDIV; - x->rdmult = cpi->RDMULT; - if (cpi->oxcf.tuning == VP8_TUNE_SSIM) vp9_activity_masking(cpi, x); - // Is segmentation enabled - if (xd->segmentation_enabled) { - // Code to set segment id in xd->mbmi.segment_id - if (xd->update_mb_segmentation_map) - mbmi->segment_id = cpi->segmentation_map[map_index]; - else - mbmi->segment_id = cm->last_frame_seg_map[map_index]; - if (mbmi->segment_id > 3) - mbmi->segment_id = 0; - - vp9_mb_init_quantizer(cpi, x); - } else - // Set to Segment 0 by default - mbmi->segment_id = 0; - - x->active_ptr = cpi->active_map + map_index; - + mbmi = &xd->mode_info_context->mbmi; #if CONFIG_SUPERBLOCKS - xd->mode_info_context->mbmi.encoded_as_sb = 0; + mbmi->sb_type = BLOCK_SIZE_MB16X16; #endif cpi->update_context = 0; // TODO Do we need this now?? vp9_intra_prediction_down_copy(xd); -#ifdef ENC_DEBUG - enc_debug = (cpi->common.current_video_frame == 46 && - mb_row == 5 && mb_col == 2); -#endif // Find best coding mode & reconstruct the MB so it is available // as a predictor for MBs that follow in the SB if (cm->frame_type == KEY_FRAME) { @@ -758,28 +815,16 @@ static void pick_mb_modes(VP9_COMP *cpi, *totaldist += d; // Dummy encode, do not do the tokenization - encode_macroblock(cpi, x, tp, - recon_yoffset, recon_uvoffset, 0, mb_col, mb_row); + encode_macroblock(cpi, tp, recon_yoffset, recon_uvoffset, 0, + mb_row + y_idx, mb_col + x_idx); // Note the encoder may have changed the segment_id // Save the coding context - vpx_memcpy(&x->mb_context[i].mic, xd->mode_info_context, + vpx_memcpy(&x->mb_context[xd->sb_index][i].mic, xd->mode_info_context, sizeof(MODE_INFO)); } else { int seg_id, r, d; - if (xd->segmentation_enabled && cpi->seg0_cnt > 0 && - !vp9_segfeature_active(xd, 0, SEG_LVL_REF_FRAME) && - vp9_segfeature_active(xd, 1, SEG_LVL_REF_FRAME) && - vp9_check_segref(xd, 1, INTRA_FRAME) + - vp9_check_segref(xd, 1, LAST_FRAME) + - vp9_check_segref(xd, 1, GOLDEN_FRAME) + - vp9_check_segref(xd, 1, ALTREF_FRAME) == 1) { - cpi->seg0_progress = (cpi->seg0_idx << 16) / cpi->seg0_cnt; - } else { - cpi->seg0_progress = (((mb_col & ~1) * 2 + (mb_row & ~1) * cm->mb_cols + i) << 16) / cm->MBs; - } - #ifdef ENC_DEBUG if (enc_debug) printf("inter pick_mb_modes %d %d\n", mb_row, mb_col); @@ -790,8 +835,8 @@ static void pick_mb_modes(VP9_COMP *cpi, *totaldist += d; // Dummy encode, do not do the tokenization - encode_macroblock(cpi, x, tp, - recon_yoffset, recon_uvoffset, 0, mb_col, mb_row); + encode_macroblock(cpi, tp, recon_yoffset, recon_uvoffset, 0, + mb_row + y_idx, mb_col + x_idx); seg_id = mbmi->segment_id; if (cpi->mb.e_mbd.segmentation_enabled && seg_id == 0) { @@ -811,28 +856,10 @@ static void pick_mb_modes(VP9_COMP *cpi, cpi->ref_pred_count[pred_context][pred_flag]++; } } - - // Next MB - mb_row += dy; - mb_col += dx; - - x->src.y_buffer += 16 * (dx + dy * x->src.y_stride); - x->src.u_buffer += 8 * (dx + dy * x->src.uv_stride); - x->src.v_buffer += 8 * (dx + dy * x->src.uv_stride); - - x->gf_active_ptr += offset_unextended; - x->partition_info += offset_extended; - xd->mode_info_context += offset_extended; - xd->prev_mode_info_context += offset_extended; - -#if CONFIG_DEBUG - assert((xd->prev_mode_info_context - cpi->common.prev_mip) == - (xd->mode_info_context - cpi->common.mip)); -#endif } /* Restore L & A coding context to those in place on entry */ - vpx_memcpy(cm->left_context, + vpx_memcpy(cm->left_context + (mb_row & 2), left_context, sizeof(left_context)); vpx_memcpy(initial_above_context_ptr, @@ -841,392 +868,204 @@ static void pick_mb_modes(VP9_COMP *cpi, } #if CONFIG_SUPERBLOCKS -static void pick_sb_modes (VP9_COMP *cpi, - VP9_COMMON *cm, - int mb_row, - int mb_col, - MACROBLOCK *x, - MACROBLOCKD *xd, - TOKENEXTRA **tp, - int *totalrate, - int *totaldist) -{ - int map_index; +static void pick_sb_modes(VP9_COMP *cpi, + VP9_COMMON *cm, + int mb_row, + int mb_col, + MACROBLOCK *x, + MACROBLOCKD *xd, + TOKENEXTRA **tp, + int *totalrate, + int *totaldist) { int recon_yoffset, recon_uvoffset; - int ref_fb_idx = cm->lst_fb_idx; - int dst_fb_idx = cm->new_fb_idx; - int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride; - int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride; - ENTROPY_CONTEXT_PLANES left_context[2]; - ENTROPY_CONTEXT_PLANES above_context[2]; - ENTROPY_CONTEXT_PLANES *initial_above_context_ptr = cm->above_context - + mb_col; - - /* Function should not modify L & A contexts; save and restore on exit */ - vpx_memcpy (left_context, - cm->left_context, - sizeof(left_context)); - vpx_memcpy (above_context, - initial_above_context_ptr, - sizeof(above_context)); - - map_index = (mb_row * cpi->common.mb_cols) + mb_col; - x->mb_activity_ptr = &cpi->mb_activity_map[map_index]; - - /* set above context pointer */ - xd->above_context = cm->above_context + mb_col; - - /* Restore the appropriate left context depending on which - * row in the SB the MB is situated */ - xd->left_context = cm->left_context; - - // Set up distance of MB to edge of frame in 1/8th pel units - xd->mb_to_top_edge = -((mb_row * 16) << 3); - xd->mb_to_left_edge = -((mb_col * 16) << 3); - xd->mb_to_bottom_edge = ((cm->mb_rows - 2 - mb_row) * 16) << 3; - xd->mb_to_right_edge = ((cm->mb_cols - 2 - mb_col) * 16) << 3; - - /* Set up limit values for MV components to prevent them from - * extending beyond the UMV borders assuming 16x16 block size */ - x->mv_row_min = -((mb_row * 16) + VP9BORDERINPIXELS - VP9_INTERP_EXTEND); - x->mv_col_min = -((mb_col * 16) + VP9BORDERINPIXELS - VP9_INTERP_EXTEND); - x->mv_row_max = ((cm->mb_rows - mb_row) * 16 + - (VP9BORDERINPIXELS - 32 - VP9_INTERP_EXTEND)); - x->mv_col_max = ((cm->mb_cols - mb_col) * 16 + - (VP9BORDERINPIXELS - 32 - VP9_INTERP_EXTEND)); - - xd->up_available = (mb_row != 0); - xd->left_available = (mb_col != 0); - recon_yoffset = (mb_row * recon_y_stride * 16) + (mb_col * 16); - recon_uvoffset = (mb_row * recon_uv_stride * 8) + (mb_col * 8); - - xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset; - xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset; - xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset; -#if 0 // FIXME - /* Copy current MB to a work buffer */ - vp9_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16); -#endif - x->rddiv = cpi->RDDIV; - x->rdmult = cpi->RDMULT; - if(cpi->oxcf.tuning == VP8_TUNE_SSIM) + set_offsets(cpi, mb_row, mb_col, 32, &recon_yoffset, &recon_uvoffset); + xd->mode_info_context->mbmi.sb_type = BLOCK_SIZE_SB32X32; + if (cpi->oxcf.tuning == VP8_TUNE_SSIM) vp9_activity_masking(cpi, x); - /* Is segmentation enabled */ - if (xd->segmentation_enabled) - { - /* Code to set segment id in xd->mbmi.segment_id */ - if (xd->update_mb_segmentation_map) - xd->mode_info_context->mbmi.segment_id = - cpi->segmentation_map[map_index] && - cpi->segmentation_map[map_index + 1] && - cpi->segmentation_map[map_index + cm->mb_cols] && - cpi->segmentation_map[map_index + cm->mb_cols + 1]; - else - xd->mode_info_context->mbmi.segment_id = - cm->last_frame_seg_map[map_index] && - cm->last_frame_seg_map[map_index + 1] && - cm->last_frame_seg_map[map_index + cm->mb_cols] && - cm->last_frame_seg_map[map_index + cm->mb_cols + 1]; - if (xd->mode_info_context->mbmi.segment_id > 3) - xd->mode_info_context->mbmi.segment_id = 0; - - vp9_mb_init_quantizer(cpi, x); - } - else - /* Set to Segment 0 by default */ - xd->mode_info_context->mbmi.segment_id = 0; - - x->active_ptr = cpi->active_map + map_index; - cpi->update_context = 0; // TODO Do we need this now?? /* Find best coding mode & reconstruct the MB so it is available * as a predictor for MBs that follow in the SB */ - if (cm->frame_type == KEY_FRAME) - { - vp9_rd_pick_intra_mode_sb(cpi, x, - totalrate, - totaldist); + if (cm->frame_type == KEY_FRAME) { + vp9_rd_pick_intra_mode_sb32(cpi, x, + totalrate, + totaldist); /* Save the coding context */ - vpx_memcpy(&x->sb_context[0].mic, xd->mode_info_context, + vpx_memcpy(&x->sb32_context[xd->sb_index].mic, xd->mode_info_context, sizeof(MODE_INFO)); } else { - if (xd->segmentation_enabled && cpi->seg0_cnt > 0 && - !vp9_segfeature_active(xd, 0, SEG_LVL_REF_FRAME) && - vp9_segfeature_active(xd, 1, SEG_LVL_REF_FRAME) && - vp9_check_segref(xd, 1, INTRA_FRAME) + - vp9_check_segref(xd, 1, LAST_FRAME) + - vp9_check_segref(xd, 1, GOLDEN_FRAME) + - vp9_check_segref(xd, 1, ALTREF_FRAME) == 1) { - cpi->seg0_progress = (cpi->seg0_idx << 16) / cpi->seg0_cnt; - } else { - cpi->seg0_progress = - (((mb_col & ~1) * 2 + (mb_row & ~1) * cm->mb_cols) << 16) / cm->MBs; - } - - vp9_rd_pick_inter_mode_sb(cpi, x, - recon_yoffset, - recon_uvoffset, - totalrate, - totaldist); + vp9_rd_pick_inter_mode_sb32(cpi, x, + recon_yoffset, + recon_uvoffset, + totalrate, + totaldist); } - - /* Restore L & A coding context to those in place on entry */ - vpx_memcpy (cm->left_context, - left_context, - sizeof(left_context)); - vpx_memcpy (initial_above_context_ptr, - above_context, - sizeof(above_context)); } -#endif -static void encode_sb(VP9_COMP *cpi, - VP9_COMMON *cm, - int mbrow, - int mbcol, - MACROBLOCK *x, - MACROBLOCKD *xd, - TOKENEXTRA **tp) { - int i; - int map_index; - int mb_row, mb_col; +#if CONFIG_SUPERBLOCKS64 +static void pick_sb64_modes(VP9_COMP *cpi, + VP9_COMMON *cm, + int mb_row, + int mb_col, + MACROBLOCK *x, + MACROBLOCKD *xd, + TOKENEXTRA **tp, + int *totalrate, + int *totaldist) { int recon_yoffset, recon_uvoffset; - int ref_fb_idx = cm->lst_fb_idx; - int dst_fb_idx = cm->new_fb_idx; - int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride; - int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride; - int row_delta[4] = { 0, +1, 0, -1}; - int col_delta[4] = { +1, -1, +1, +1}; - - mb_row = mbrow; - mb_col = mbcol; - - /* Encode MBs in raster order within the SB */ - for (i = 0; i < 4; i++) { - int dy = row_delta[i]; - int dx = col_delta[i]; - int offset_extended = dy * xd->mode_info_stride + dx; - int offset_unextended = dy * cm->mb_cols + dx; - MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi; - - if ((mb_row >= cm->mb_rows) || (mb_col >= cm->mb_cols)) { - // MB lies outside frame, move on - mb_row += dy; - mb_col += dx; - x->src.y_buffer += 16 * (dx + dy * x->src.y_stride); - x->src.u_buffer += 8 * (dx + dy * x->src.uv_stride); - x->src.v_buffer += 8 * (dx + dy * x->src.uv_stride); + set_offsets(cpi, mb_row, mb_col, 64, &recon_yoffset, &recon_uvoffset); + xd->mode_info_context->mbmi.sb_type = BLOCK_SIZE_SB64X64; + if (cpi->oxcf.tuning == VP8_TUNE_SSIM) + vp9_activity_masking(cpi, x); + cpi->update_context = 0; // TODO(rbultje) Do we need this now?? - x->gf_active_ptr += offset_unextended; - x->partition_info += offset_extended; - xd->mode_info_context += offset_extended; - xd->prev_mode_info_context += offset_extended; + /* Find best coding mode & reconstruct the MB so it is available + * as a predictor for MBs that follow in the SB */ + if (cm->frame_type == KEY_FRAME) { + vp9_rd_pick_intra_mode_sb64(cpi, x, + totalrate, + totaldist); -#if CONFIG_DEBUG - assert((xd->prev_mode_info_context - cpi->common.prev_mip) == - (xd->mode_info_context - cpi->common.mip)); -#endif - continue; - } + /* Save the coding context */ + vpx_memcpy(&x->sb64_context.mic, xd->mode_info_context, + sizeof(MODE_INFO)); + } else { + vp9_rd_pick_inter_mode_sb64(cpi, x, + recon_yoffset, + recon_uvoffset, + totalrate, + totaldist); + } +} +#endif // CONFIG_SUPERBLOCKS64 +#endif // CONFIG_SUPERBLOCKS - xd->mb_index = i; +static void update_stats(VP9_COMP *cpi) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; + MODE_INFO *mi = xd->mode_info_context; + MB_MODE_INFO *const mbmi = &mi->mbmi; - // Restore MB state to that when it was picked -#if CONFIG_SUPERBLOCKS - if (xd->mode_info_context->mbmi.encoded_as_sb) { - update_state(cpi, x, &x->sb_context[i]); - cpi->sb_count++; - } else + if (cm->frame_type == KEY_FRAME) { +#ifdef MODE_STATS + y_modes[mbmi->mode]++; #endif - update_state(cpi, x, &x->mb_context[i]); - - map_index = (mb_row * cpi->common.mb_cols) + mb_col; - x->mb_activity_ptr = &cpi->mb_activity_map[map_index]; - - // reset above block coeffs - xd->above_context = cm->above_context + mb_col; - xd->left_context = cm->left_context + (i >> 1); - - // Set up distance of MB to edge of the frame in 1/8th pel units - // Set up limit values for MV components to prevent them from - // extending beyond the UMV borders assuming 32x32 block size - x->mv_row_min = -((mb_row * 16) + VP9BORDERINPIXELS - VP9_INTERP_EXTEND); - x->mv_col_min = -((mb_col * 16) + VP9BORDERINPIXELS - VP9_INTERP_EXTEND); - - xd->mb_to_top_edge = -((mb_row * 16) << 3); - xd->mb_to_left_edge = -((mb_col * 16) << 3); + } else { + int segment_id, seg_ref_active; -#if CONFIG_SUPERBLOCKS - if (xd->mode_info_context->mbmi.encoded_as_sb) { - x->mv_row_max = ((cm->mb_rows - mb_row) * 16 + - (VP9BORDERINPIXELS - 32 - VP9_INTERP_EXTEND)); - x->mv_col_max = ((cm->mb_cols - mb_col) * 16 + - (VP9BORDERINPIXELS - 32 - VP9_INTERP_EXTEND)); - - xd->mb_to_bottom_edge = ((cm->mb_rows - 2 - mb_row) * 16) << 3; - xd->mb_to_right_edge = ((cm->mb_cols - 2 - mb_col) * 16) << 3; - } else { -#endif - x->mv_row_max = ((cm->mb_rows - mb_row) * 16 + - (VP9BORDERINPIXELS - 16 - VP9_INTERP_EXTEND)); - x->mv_col_max = ((cm->mb_cols - mb_col) * 16 + - (VP9BORDERINPIXELS - 16 - VP9_INTERP_EXTEND)); + if (mbmi->ref_frame) { + int pred_context = vp9_get_pred_context(cm, xd, PRED_COMP); - xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3; - xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3; -#if CONFIG_SUPERBLOCKS + if (mbmi->second_ref_frame <= INTRA_FRAME) + cpi->single_pred_count[pred_context]++; + else + cpi->comp_pred_count[pred_context]++; } -#endif - - xd->up_available = (mb_row != 0); - xd->left_available = (mb_col != 0); - recon_yoffset = (mb_row * recon_y_stride * 16) + (mb_col * 16); - recon_uvoffset = (mb_row * recon_uv_stride * 8) + (mb_col * 8); +#ifdef MODE_STATS + inter_y_modes[mbmi->mode]++; - xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset; - xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset; - xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset; + if (mbmi->mode == SPLITMV) { + int b; -#if !CONFIG_SUPERBLOCKS - // Copy current MB to a work buffer - vp9_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16); + for (b = 0; b < x->partition_info->count; b++) { + inter_b_modes[x->partition_info->bmi[b].mode]++; + } + } #endif - if (cpi->oxcf.tuning == VP8_TUNE_SSIM) - vp9_activity_masking(cpi, x); - - // Is segmentation enabled - if (xd->segmentation_enabled) { - vp9_mb_init_quantizer(cpi, x); + // If we have just a single reference frame coded for a segment then + // exclude from the reference frame counts used to work out + // probabilities. NOTE: At the moment we dont support custom trees + // for the reference frame coding for each segment but this is a + // possible future action. + segment_id = mbmi->segment_id; + seg_ref_active = vp9_segfeature_active(xd, segment_id, + SEG_LVL_REF_FRAME); + if (!seg_ref_active || + ((vp9_check_segref(xd, segment_id, INTRA_FRAME) + + vp9_check_segref(xd, segment_id, LAST_FRAME) + + vp9_check_segref(xd, segment_id, GOLDEN_FRAME) + + vp9_check_segref(xd, segment_id, ALTREF_FRAME)) > 1)) { + cpi->count_mb_ref_frame_usage[mbmi->ref_frame]++; } + // Count of last ref frame 0,0 usage + if ((mbmi->mode == ZEROMV) && (mbmi->ref_frame == LAST_FRAME)) + cpi->inter_zz_count++; + } +} - x->active_ptr = cpi->active_map + map_index; - - cpi->update_context = 0; +static void encode_sb(VP9_COMP *cpi, + VP9_COMMON *cm, + int mb_row, + int mb_col, + int output_enabled, + MACROBLOCK *x, + MACROBLOCKD *xd, + TOKENEXTRA **tp, int is_sb) { + int recon_yoffset, recon_uvoffset; #if CONFIG_SUPERBLOCKS - if (!xd->mode_info_context->mbmi.encoded_as_sb) -#endif - vp9_intra_prediction_down_copy(xd); + cpi->sb32_count[is_sb]++; + if (is_sb) { + set_offsets(cpi, mb_row, mb_col, 32, &recon_yoffset, &recon_uvoffset); + update_state(cpi, x, &x->sb32_context[xd->sb_index], 32, output_enabled); - if (cm->frame_type == KEY_FRAME) { -#if CONFIG_SUPERBLOCKS - if (xd->mode_info_context->mbmi.encoded_as_sb) - encode_superblock(cpi, x, tp, recon_yoffset, recon_uvoffset, - mb_col, mb_row); - else -#endif - encode_macroblock(cpi, x, tp, recon_yoffset, recon_uvoffset, 1, - mb_col, mb_row); - // Note the encoder may have changed the segment_id + encode_superblock32(cpi, tp, recon_yoffset, recon_uvoffset, + output_enabled, mb_row, mb_col); + if (output_enabled) + update_stats(cpi); -#ifdef MODE_STATS - y_modes[mbmi->mode]++; + if (output_enabled) { + (*tp)->Token = EOSB_TOKEN; + (*tp)++; + if (mb_row < cm->mb_rows) + cpi->tplist[mb_row].stop = *tp; + } + } else #endif - } else { - unsigned char *segment_id; - int seg_ref_active; - - if (xd->mode_info_context->mbmi.ref_frame) { - unsigned char pred_context; + { + int i; - pred_context = vp9_get_pred_context(cm, xd, PRED_COMP); + for (i = 0; i < 4; i++) { + const int x_idx = i & 1, y_idx = i >> 1; - if (xd->mode_info_context->mbmi.second_ref_frame <= INTRA_FRAME) - cpi->single_pred_count[pred_context]++; - else - cpi->comp_pred_count[pred_context]++; + if ((mb_row + y_idx >= cm->mb_rows) || (mb_col + x_idx >= cm->mb_cols)) { + // MB lies outside frame, move on + continue; } -#if CONFIG_SUPERBLOCKS - if (xd->mode_info_context->mbmi.encoded_as_sb) - encode_superblock(cpi, x, tp, recon_yoffset, recon_uvoffset, - mb_col, mb_row); - else -#endif - encode_macroblock(cpi, x, tp, recon_yoffset, recon_uvoffset, 1, - mb_col, mb_row); - // Note the encoder may have changed the segment_id - -#ifdef MODE_STATS - inter_y_modes[mbmi->mode]++; - - if (mbmi->mode == SPLITMV) { - int b; - - for (b = 0; b < x->partition_info->count; b++) { - inter_b_modes[x->partition_info->bmi[b].mode]++; - } - } + set_offsets(cpi, mb_row + y_idx, mb_col + x_idx, 16, + &recon_yoffset, &recon_uvoffset); + xd->mb_index = i; + update_state(cpi, x, &x->mb_context[xd->sb_index][i], 16, output_enabled); +#if !CONFIG_SUPERBLOCKS + // Copy current MB to a work buffer + vp9_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16); #endif - // If we have just a single reference frame coded for a segment then - // exclude from the reference frame counts used to work out - // probabilities. NOTE: At the moment we dont support custom trees - // for the reference frame coding for each segment but this is a - // possible future action. - segment_id = &mbmi->segment_id; - seg_ref_active = vp9_segfeature_active(xd, *segment_id, - SEG_LVL_REF_FRAME); - if (!seg_ref_active || - ((vp9_check_segref(xd, *segment_id, INTRA_FRAME) + - vp9_check_segref(xd, *segment_id, LAST_FRAME) + - vp9_check_segref(xd, *segment_id, GOLDEN_FRAME) + - vp9_check_segref(xd, *segment_id, ALTREF_FRAME)) > 1)) { - { - cpi->count_mb_ref_frame_usage[mbmi->ref_frame]++; - } - } - - // Count of last ref frame 0,0 usage - if ((mbmi->mode == ZEROMV) && (mbmi->ref_frame == LAST_FRAME)) - cpi->inter_zz_count++; - } + if (cpi->oxcf.tuning == VP8_TUNE_SSIM) + vp9_activity_masking(cpi, x); -#if CONFIG_SUPERBLOCKS - if (xd->mode_info_context->mbmi.encoded_as_sb) { - x->src.y_buffer += 32; - x->src.u_buffer += 16; - x->src.v_buffer += 16; + vp9_intra_prediction_down_copy(xd); - x->gf_active_ptr += 2; - x->partition_info += 2; - xd->mode_info_context += 2; - xd->prev_mode_info_context += 2; + encode_macroblock(cpi, tp, recon_yoffset, recon_uvoffset, + output_enabled, mb_row + y_idx, mb_col + x_idx); + if (output_enabled) + update_stats(cpi); - (*tp)->Token = EOSB_TOKEN; - (*tp)++; - if (mb_row < cm->mb_rows) cpi->tplist[mb_row].stop = *tp; - break; + if (output_enabled) { + (*tp)->Token = EOSB_TOKEN; + (*tp)++; + if (mb_row + y_idx < cm->mb_rows) + cpi->tplist[mb_row + y_idx].stop = *tp; + } } -#endif - - // Next MB - mb_row += dy; - mb_col += dx; - - x->src.y_buffer += 16 * (dx + dy * x->src.y_stride); - x->src.u_buffer += 8 * (dx + dy * x->src.uv_stride); - x->src.v_buffer += 8 * (dx + dy * x->src.uv_stride); - - x->gf_active_ptr += offset_unextended; - x->partition_info += offset_extended; - xd->mode_info_context += offset_extended; - xd->prev_mode_info_context += offset_extended; - -#if CONFIG_DEBUG - assert((xd->prev_mode_info_context - cpi->common.prev_mip) == - (xd->mode_info_context - cpi->common.mip)); -#endif - (*tp)->Token = EOSB_TOKEN; - (*tp)++; - if (mb_row < cm->mb_rows) cpi->tplist[mb_row].stop = *tp; } // debug output @@ -1237,17 +1076,57 @@ static void encode_sb(VP9_COMP *cpi, fprintf(statsfile, "\n"); fclose(statsfile); } -#endif +#endif +} + +#if CONFIG_SUPERBLOCKS && CONFIG_SUPERBLOCKS64 +static void encode_sb64(VP9_COMP *cpi, + VP9_COMMON *cm, + int mb_row, + int mb_col, + MACROBLOCK *x, + MACROBLOCKD *xd, + TOKENEXTRA **tp, int is_sb[4]) { + cpi->sb64_count[is_sb[0] == 2]++; + if (is_sb[0] == 2) { + int recon_yoffset, recon_uvoffset; + + set_offsets(cpi, mb_row, mb_col, 64, &recon_yoffset, &recon_uvoffset); + update_state(cpi, x, &x->sb64_context, 64, 1); + encode_superblock64(cpi, tp, recon_yoffset, recon_uvoffset, + 1, mb_row, mb_col); + update_stats(cpi); + + (*tp)->Token = EOSB_TOKEN; + (*tp)++; + if (mb_row < cm->mb_rows) + cpi->tplist[mb_row].stop = *tp; + } else { + int i; + + for (i = 0; i < 4; i++) { + const int x_idx = i & 1, y_idx = i >> 1; + + if (mb_row + y_idx * 2 >= cm->mb_rows || + mb_col + x_idx * 2 >= cm->mb_cols) { + // MB lies outside frame, move on + continue; + } + xd->sb_index = i; + encode_sb(cpi, cm, mb_row + 2 * y_idx, mb_col + 2 * x_idx, 1, x, xd, tp, + is_sb[i]); + } + } } +#endif // CONFIG_SUPERBLOCKS && CONFIG_SUPERBLOCKS64 -static -void encode_sb_row(VP9_COMP *cpi, - VP9_COMMON *cm, - int mb_row, - MACROBLOCK *x, - MACROBLOCKD *xd, - TOKENEXTRA **tp, - int *totalrate) { +static void encode_sb_row(VP9_COMP *cpi, + VP9_COMMON *cm, + int mb_row, + MACROBLOCK *x, + MACROBLOCKD *xd, + TOKENEXTRA **tp, + int *totalrate) { int mb_col; int mb_cols = cm->mb_cols; @@ -1255,105 +1134,103 @@ void encode_sb_row(VP9_COMP *cpi, vpx_memset(cm->left_context, 0, sizeof(cm->left_context)); // Code each SB in the row - for (mb_col = 0; mb_col < mb_cols; mb_col += 2) { - int mb_rate = 0, mb_dist = 0; + for (mb_col = 0; mb_col < mb_cols; mb_col += 4) { + int i; + int sb32_rate = 0, sb32_dist = 0; + int is_sb[4]; +#if CONFIG_SUPERBLOCKS && CONFIG_SUPERBLOCKS64 + int sb64_rate = INT_MAX, sb64_dist; + ENTROPY_CONTEXT_PLANES l[4], a[4]; + TOKENEXTRA *tp_orig = *tp; + + memcpy(&a, cm->above_context + mb_col, sizeof(a)); + memcpy(&l, cm->left_context, sizeof(l)); +#endif // CONFIG_SUPERBLOCKS && CONFIG_SUPERBLOCKS64 + for (i = 0; i < 4; i++) { + const int x_idx = (i & 1) << 1, y_idx = i & 2; + int mb_rate = 0, mb_dist = 0; #if CONFIG_SUPERBLOCKS - int sb_rate = INT_MAX, sb_dist; + int sb_rate = INT_MAX, sb_dist; #endif -#if CONFIG_DEBUG - MODE_INFO *mic = xd->mode_info_context; - PARTITION_INFO *pi = x->partition_info; - signed char *gfa = x->gf_active_ptr; - uint8_t *yb = x->src.y_buffer; - uint8_t *ub = x->src.u_buffer; - uint8_t *vb = x->src.v_buffer; + if (mb_row + y_idx >= cm->mb_rows || mb_col + x_idx >= cm->mb_cols) + continue; + + xd->sb_index = i; + + pick_mb_modes(cpi, cm, mb_row + y_idx, mb_col + x_idx, + x, xd, tp, &mb_rate, &mb_dist); +#if CONFIG_SUPERBLOCKS + mb_rate += vp9_cost_bit(cm->sb32_coded, 0); #endif #if CONFIG_SUPERBLOCKS - // Pick modes assuming the SB is coded as 4 independent MBs - xd->mode_info_context->mbmi.encoded_as_sb = 0; + if (!((( mb_cols & 1) && mb_col + x_idx == mb_cols - 1) || + ((cm->mb_rows & 1) && mb_row + y_idx == cm->mb_rows - 1))) { + /* Pick a mode assuming that it applies to all 4 of the MBs in the SB */ + pick_sb_modes(cpi, cm, mb_row + y_idx, mb_col + x_idx, + x, xd, tp, &sb_rate, &sb_dist); + sb_rate += vp9_cost_bit(cm->sb32_coded, 1); + } + + /* Decide whether to encode as a SB or 4xMBs */ + if (sb_rate < INT_MAX && + RDCOST(x->rdmult, x->rddiv, sb_rate, sb_dist) < + RDCOST(x->rdmult, x->rddiv, mb_rate, mb_dist)) { + is_sb[i] = 1; + sb32_rate += sb_rate; + sb32_dist += sb_dist; + } else #endif - pick_mb_modes(cpi, cm, mb_row, mb_col, x, xd, tp, &mb_rate, &mb_dist); + { #if CONFIG_SUPERBLOCKS - mb_rate += vp9_cost_bit(cm->sb_coded, 0); + is_sb[i] = 0; #endif + sb32_rate += mb_rate; + sb32_dist += mb_dist; + } - x->src.y_buffer -= 32; - x->src.u_buffer -= 16; - x->src.v_buffer -= 16; - - x->gf_active_ptr -= 2; - x->partition_info -= 2; - xd->mode_info_context -= 2; - xd->prev_mode_info_context -= 2; + /* Encode SB using best computed mode(s) */ + // FIXME(rbultje): there really shouldn't be any need to encode_mb/sb + // for each level that we go up, we can just keep tokens and recon + // pixels of the lower level; also, inverting SB/MB order (big->small + // instead of small->big) means we can use as threshold for small, which + // may enable breakouts if RD is not good enough (i.e. faster) + encode_sb(cpi, cm, mb_row + y_idx, mb_col + x_idx, + !(CONFIG_SUPERBLOCKS && CONFIG_SUPERBLOCKS64), + x, xd, tp, is_sb[i]); + } -#if CONFIG_DEBUG - assert(x->gf_active_ptr == gfa); - assert(x->partition_info == pi); - assert(xd->mode_info_context == mic); - assert(x->src.y_buffer == yb); - assert(x->src.u_buffer == ub); - assert(x->src.v_buffer == vb); -#endif +#if CONFIG_SUPERBLOCKS && CONFIG_SUPERBLOCKS64 + memcpy(cm->above_context + mb_col, &a, sizeof(a)); + memcpy(cm->left_context, &l, sizeof(l)); + sb32_rate += vp9_cost_bit(cm->sb64_coded, 0); -#if CONFIG_SUPERBLOCKS - if (!((( mb_cols & 1) && mb_col == mb_cols - 1) || - ((cm->mb_rows & 1) && mb_row == cm->mb_rows - 1))) { - /* Pick a mode assuming that it applies to all 4 of the MBs in the SB */ - xd->mode_info_context->mbmi.encoded_as_sb = 1; - pick_sb_modes(cpi, cm, mb_row, mb_col, x, xd, tp, &sb_rate, &sb_dist); - sb_rate += vp9_cost_bit(cm->sb_coded, 1); + if (!((( mb_cols & 3) && mb_col + 3 >= mb_cols) || + ((cm->mb_rows & 3) && mb_row + 3 >= cm->mb_rows))) { + pick_sb64_modes(cpi, cm, mb_row, mb_col, + x, xd, tp, &sb64_rate, &sb64_dist); + sb64_rate += vp9_cost_bit(cm->sb64_coded, 1); } /* Decide whether to encode as a SB or 4xMBs */ - if (sb_rate < INT_MAX && - RDCOST(x->rdmult, x->rddiv, sb_rate, sb_dist) < - RDCOST(x->rdmult, x->rddiv, mb_rate, mb_dist)) { - xd->mode_info_context->mbmi.encoded_as_sb = 1; - xd->mode_info_context[1].mbmi.encoded_as_sb = 1; - xd->mode_info_context[cm->mode_info_stride].mbmi.encoded_as_sb = 1; - xd->mode_info_context[1 + cm->mode_info_stride].mbmi.encoded_as_sb = 1; - *totalrate += sb_rate; + if (sb64_rate < INT_MAX && + RDCOST(x->rdmult, x->rddiv, sb64_rate, sb64_dist) < + RDCOST(x->rdmult, x->rddiv, sb32_rate, sb32_dist)) { + is_sb[0] = 2; + *totalrate += sb64_rate; } else #endif { -#if CONFIG_SUPERBLOCKS - xd->mode_info_context->mbmi.encoded_as_sb = 0; - if (cm->mb_cols - 1 > mb_col) - xd->mode_info_context[1].mbmi.encoded_as_sb = 0; - if (cm->mb_rows - 1 > mb_row) { - xd->mode_info_context[cm->mode_info_stride].mbmi.encoded_as_sb = 0; - if (cm->mb_cols - 1 > mb_col) - xd->mode_info_context[1 + cm->mode_info_stride].mbmi.encoded_as_sb = 0; - } -#endif - *totalrate += mb_rate; + *totalrate += sb32_rate; } - /* Encode SB using best computed mode(s) */ - encode_sb(cpi, cm, mb_row, mb_col, x, xd, tp); - -#if CONFIG_DEBUG - assert(x->gf_active_ptr == gfa + 2); - assert(x->partition_info == pi + 2); - assert(xd->mode_info_context == mic + 2); - assert(x->src.y_buffer == yb + 32); - assert(x->src.u_buffer == ub + 16); - assert(x->src.v_buffer == vb + 16); -#endif +#if CONFIG_SUPERBLOCKS && CONFIG_SUPERBLOCKS64 + assert(tp_orig == *tp); + encode_sb64(cpi, cm, mb_row, mb_col, x, xd, tp, is_sb); + assert(tp_orig < *tp); +#endif // CONFIG_SUPERBLOCKS && CONFIG_SUPERBLOCKS64 } - - // this is to account for the border - x->gf_active_ptr += mb_cols - (mb_cols & 0x1); - x->partition_info += xd->mode_info_stride + 1 - (mb_cols & 0x1); - xd->mode_info_context += xd->mode_info_stride + 1 - (mb_cols & 0x1); - xd->prev_mode_info_context += xd->mode_info_stride + 1 - (mb_cols & 0x1); - -#if CONFIG_DEBUG - assert((xd->prev_mode_info_context - cpi->common.prev_mip) == - (xd->mode_info_context - cpi->common.mip)); -#endif } static void init_encode_frame_mb_context(VP9_COMP *cpi) { @@ -1361,22 +1238,11 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; - // GF active flags data structure - x->gf_active_ptr = (signed char *)cpi->gf_active_flags; - - // Activity map pointer - x->mb_activity_ptr = cpi->mb_activity_map; - x->act_zbin_adj = 0; cpi->seg0_idx = 0; vpx_memset(cpi->ref_pred_count, 0, sizeof(cpi->ref_pred_count)); - x->partition_info = x->pi; - - xd->mode_info_context = cm->mi; xd->mode_info_stride = cm->mode_info_stride; - xd->prev_mode_info_context = cm->prev_mi; - xd->frame_type = cm->frame_type; xd->frames_since_golden = cm->frames_since_golden; @@ -1387,7 +1253,7 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) { vp9_init_mbmode_probs(cm); // Copy data over into macro block data structures. - x->src = * cpi->Source; + x->src = *cpi->Source; xd->pre = cm->yv12_fb[cm->lst_fb_idx]; xd->dst = cm->yv12_fb[cm->new_fb_idx]; @@ -1413,8 +1279,11 @@ static void init_encode_frame_mb_context(VP9_COMP *cpi) { vp9_zero(cpi->common.fc.mv_ref_ct) #if CONFIG_SUPERBLOCKS vp9_zero(cpi->sb_ymode_count) - cpi->sb_count = 0; -#endif + vp9_zero(cpi->sb32_count); +#if CONFIG_SUPERBLOCKS64 + vp9_zero(cpi->sb64_count); +#endif // CONFIG_SUPERBLOCKS64 +#endif // CONFIG_SUPERBLOCKS #if CONFIG_COMP_INTERINTRA_PRED vp9_zero(cpi->interintra_count); vp9_zero(cpi->interintra_select_count); @@ -1527,15 +1396,8 @@ static void encode_frame_internal(VP9_COMP *cpi) { { // For each row of SBs in the frame - for (mb_row = 0; mb_row < cm->mb_rows; mb_row += 2) { - int offset = (cm->mb_cols + 1) & ~0x1; - + for (mb_row = 0; mb_row < cm->mb_rows; mb_row += 4) { encode_sb_row(cpi, cm, mb_row, x, xd, &tp, &totalrate); - - // adjust to the next row of SBs - x->src.y_buffer += 32 * x->src.y_stride - 16 * offset; - x->src.u_buffer += 16 * x->src.uv_stride - 8 * offset; - x->src.v_buffer += 16 * x->src.uv_stride - 8 * offset; } cpi->tok_count = (unsigned int)(tp - cpi->tok); @@ -1580,78 +1442,150 @@ static int check_dual_ref_flags(VP9_COMP *cpi) { } } -static void reset_skip_txfm_size(VP9_COMP *cpi, TX_SIZE txfm_max) { - VP9_COMMON *cm = &cpi->common; - int mb_row, mb_col, mis = cm->mode_info_stride, segment_id; - MODE_INFO *mi, *mi_ptr = cm->mi; +static void reset_skip_txfm_size_mb(VP9_COMP *cpi, + MODE_INFO *mi, TX_SIZE txfm_max) { + MB_MODE_INFO *const mbmi = &mi->mbmi; + + if (mbmi->txfm_size > txfm_max) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; + const int segment_id = mbmi->segment_id; + + xd->mode_info_context = mi; + assert((vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) && + vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0) || + (cm->mb_no_coeff_skip && mbmi->mb_skip_coeff)); + mbmi->txfm_size = txfm_max; + } +} + #if CONFIG_SUPERBLOCKS - int skip; - MODE_INFO *sb_mi_ptr = cm->mi, *sb_mi; - MB_MODE_INFO *sb_mbmi; +static int get_skip_flag(MODE_INFO *mi, int mis, int ymbs, int xmbs) { + int x, y; + + for (y = 0; y < ymbs; y++) { + for (x = 0; x < xmbs; x++) { + if (!mi[y * mis + x].mbmi.mb_skip_coeff) + return 0; + } + } + + return 1; +} + +static void set_txfm_flag(MODE_INFO *mi, int mis, int ymbs, int xmbs, + TX_SIZE txfm_size) { + int x, y; + + for (y = 0; y < ymbs; y++) { + for (x = 0; x < xmbs; x++) { + mi[y * mis + x].mbmi.txfm_size = txfm_size; + } + } +} + +static void reset_skip_txfm_size_sb32(VP9_COMP *cpi, MODE_INFO *mi, + int mis, TX_SIZE txfm_max, + int mb_rows_left, int mb_cols_left) { + MB_MODE_INFO *const mbmi = &mi->mbmi; + + if (mbmi->txfm_size > txfm_max) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; + const int segment_id = mbmi->segment_id; + const int ymbs = MIN(2, mb_rows_left); + const int xmbs = MIN(2, mb_cols_left); + + xd->mode_info_context = mi; + assert((vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) && + vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0) || + (cm->mb_no_coeff_skip && get_skip_flag(mi, mis, ymbs, xmbs))); + set_txfm_flag(mi, mis, ymbs, xmbs, txfm_max); + } +} + +#if CONFIG_SUPERBLOCKS64 +static void reset_skip_txfm_size_sb64(VP9_COMP *cpi, MODE_INFO *mi, + int mis, TX_SIZE txfm_max, + int mb_rows_left, int mb_cols_left) { + MB_MODE_INFO *const mbmi = &mi->mbmi; + + if (mbmi->txfm_size > txfm_max) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; + const int segment_id = mbmi->segment_id; + const int ymbs = MIN(4, mb_rows_left); + const int xmbs = MIN(4, mb_cols_left); + + xd->mode_info_context = mi; + assert((vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) && + vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0) || + (cm->mb_no_coeff_skip && get_skip_flag(mi, mis, ymbs, xmbs))); + set_txfm_flag(mi, mis, ymbs, xmbs, txfm_max); + } +} #endif - MB_MODE_INFO *mbmi; - MACROBLOCK *x = &cpi->mb; - MACROBLOCKD *xd = &x->e_mbd; +#endif + +static void reset_skip_txfm_size(VP9_COMP *cpi, TX_SIZE txfm_max) { + VP9_COMMON *const cm = &cpi->common; + int mb_row, mb_col; + const int mis = cm->mode_info_stride; + MODE_INFO *mi, *mi_ptr = cm->mi; - for (mb_row = 0; mb_row < cm->mb_rows; mb_row++, mi_ptr += mis) { + for (mb_row = 0; mb_row < cm->mb_rows; mb_row += 4, mi_ptr += 4 * mis) { mi = mi_ptr; + for (mb_col = 0; mb_col < cm->mb_cols; mb_col += 4, mi += 4) { +#if CONFIG_SUPERBLOCKS && CONFIG_SUPERBLOCKS64 + if (mi->mbmi.sb_type == BLOCK_SIZE_SB64X64) { + reset_skip_txfm_size_sb64(cpi, mi, mis, txfm_max, + cm->mb_rows - mb_row, cm->mb_cols - mb_col); + } else +#endif // CONFIG_SUPERBLOCKS && CONFIG_SUPERBLOCKS64 + { + int i; + + for (i = 0; i < 4; i++) { + const int x_idx_sb = (i & 1) << 1, y_idx_sb = i & 2; #if CONFIG_SUPERBLOCKS - sb_mi = sb_mi_ptr; + MODE_INFO *sb_mi = mi + y_idx_sb * mis + x_idx_sb; #endif - for (mb_col = 0; mb_col < cm->mb_cols; mb_col++, mi++) { - mbmi = &mi->mbmi; + + if (mb_row + y_idx_sb >= cm->mb_rows || + mb_col + x_idx_sb >= cm->mb_cols) + continue; + #if CONFIG_SUPERBLOCKS - sb_mbmi = &sb_mi->mbmi; + if (sb_mi->mbmi.sb_type) { + reset_skip_txfm_size_sb32(cpi, sb_mi, mis, txfm_max, + cm->mb_rows - mb_row - y_idx_sb, + cm->mb_cols - mb_col - x_idx_sb); + } else #endif - if (mbmi->txfm_size > txfm_max) { + { + int m; + + for (m = 0; m < 4; m++) { + const int x_idx = x_idx_sb + (m & 1), y_idx = y_idx_sb + (m >> 1); + MODE_INFO *mb_mi; + + if (mb_col + x_idx >= cm->mb_cols || + mb_row + y_idx >= cm->mb_rows) + continue; + + mb_mi = mi + y_idx * mis + x_idx; #if CONFIG_SUPERBLOCKS - if (sb_mbmi->encoded_as_sb) { - if (!((mb_col & 1) || (mb_row & 1))) { - segment_id = mbmi->segment_id; - skip = mbmi->mb_skip_coeff; - if (mb_col < cm->mb_cols - 1) { - segment_id = segment_id && mi[1].mbmi.segment_id; - skip = skip && mi[1].mbmi.mb_skip_coeff; - } - if (mb_row < cm->mb_rows - 1) { - segment_id = segment_id && - mi[cm->mode_info_stride].mbmi.segment_id; - skip = skip && mi[cm->mode_info_stride].mbmi.mb_skip_coeff; - if (mb_col < cm->mb_cols - 1) { - segment_id = segment_id && - mi[cm->mode_info_stride + 1].mbmi.segment_id; - skip = skip && mi[cm->mode_info_stride + 1].mbmi.mb_skip_coeff; - } + assert(mb_mi->mbmi.sb_type == BLOCK_SIZE_MB16X16); +#endif + reset_skip_txfm_size_mb(cpi, mb_mi, txfm_max); } - xd->mode_info_context = mi; - assert((vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) && - vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0) || - (cm->mb_no_coeff_skip && skip)); - mbmi->txfm_size = txfm_max; - } else { - mbmi->txfm_size = sb_mbmi->txfm_size; } - } else { -#endif - segment_id = mbmi->segment_id; - xd->mode_info_context = mi; - assert((vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) && - vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0) || - (cm->mb_no_coeff_skip && mbmi->mb_skip_coeff)); - mbmi->txfm_size = txfm_max; -#if CONFIG_SUPERBLOCKS } -#endif } -#if CONFIG_SUPERBLOCKS - if (mb_col & 1) - sb_mi += 2; -#endif } -#if CONFIG_SUPERBLOCKS - if (mb_row & 1) - sb_mi_ptr += 2 * mis; -#endif } } @@ -1961,7 +1895,7 @@ static void sum_intra_stats(VP9_COMP *cpi, MACROBLOCK *x) { #endif #if CONFIG_SUPERBLOCKS - if (xd->mode_info_context->mbmi.encoded_as_sb) { + if (xd->mode_info_context->mbmi.sb_type) { ++cpi->sb_ymode_count[m]; } else #endif @@ -2014,9 +1948,9 @@ static void update_sb_skip_coeff_state(VP9_COMP *cpi, ENTROPY_CONTEXT_PLANES tl[4], TOKENEXTRA *t[4], TOKENEXTRA **tp, - int skip[4]) + int skip[4], int output_enabled) { - TOKENEXTRA tokens[4][16 * 24]; + TOKENEXTRA tokens[4][16 * 25]; int n_tokens[4], n; // if there were no skips, we don't need to do anything @@ -2056,7 +1990,7 @@ static void update_sb_skip_coeff_state(VP9_COMP *cpi, if (skip[n]) { x->e_mbd.above_context = &ta[n]; x->e_mbd.left_context = &tl[n]; - vp9_stuff_mb(cpi, &x->e_mbd, tp, 0); + vp9_stuff_mb(cpi, &x->e_mbd, tp, !output_enabled); } else { if (n_tokens[n]) { memcpy(*tp, tokens[n], sizeof(*t[0]) * n_tokens[n]); @@ -2065,22 +1999,135 @@ static void update_sb_skip_coeff_state(VP9_COMP *cpi, } } } + +#if CONFIG_SUPERBLOCKS64 +static void update_sb64_skip_coeff_state(VP9_COMP *cpi, + MACROBLOCK *x, + ENTROPY_CONTEXT_PLANES ta[16], + ENTROPY_CONTEXT_PLANES tl[16], + TOKENEXTRA *t[16], + TOKENEXTRA **tp, + int skip[16], int output_enabled) { + if (x->e_mbd.mode_info_context->mbmi.txfm_size == TX_32X32) { + TOKENEXTRA tokens[4][1024+512]; + int n_tokens[4], n; + + // if there were no skips, we don't need to do anything + if (!skip[0] && !skip[1] && !skip[2] && !skip[3]) + return; + + // if we don't do coeff skipping for this frame, we don't + // need to do anything here + if (!cpi->common.mb_no_coeff_skip) + return; + + // if all 4 MBs skipped coeff coding, nothing to be done + if (skip[0] && skip[1] && skip[2] && skip[3]) + return; + + // so the situation now is that we want to skip coeffs + // for some MBs, but not all, and we didn't code EOB + // coefficients for them. However, the skip flag for this + // SB will be 0 overall, so we need to insert EOBs in the + // middle of the token tree. Do so here. + for (n = 0; n < 4; n++) { + if (n < 3) { + n_tokens[n] = t[n + 1] - t[n]; + } else { + n_tokens[n] = *tp - t[3]; + } + if (n_tokens[n]) { + memcpy(tokens[n], t[n], n_tokens[n] * sizeof(*t[0])); + } + } + + // reset pointer, stuff EOBs where necessary + *tp = t[0]; + for (n = 0; n < 4; n++) { + if (skip[n]) { + x->e_mbd.above_context = &ta[n * 2]; + x->e_mbd.left_context = &tl[n * 2]; + vp9_stuff_sb(cpi, &x->e_mbd, tp, !output_enabled); + } else { + if (n_tokens[n]) { + memcpy(*tp, tokens[n], sizeof(*t[0]) * n_tokens[n]); + } + (*tp) += n_tokens[n]; + } + } + } else { + TOKENEXTRA tokens[16][16 * 25]; + int n_tokens[16], n; + + // if there were no skips, we don't need to do anything + if (!skip[ 0] && !skip[ 1] && !skip[ 2] && !skip[ 3] && + !skip[ 4] && !skip[ 5] && !skip[ 6] && !skip[ 7] && + !skip[ 8] && !skip[ 9] && !skip[10] && !skip[11] && + !skip[12] && !skip[13] && !skip[14] && !skip[15]) + return; + + // if we don't do coeff skipping for this frame, we don't + // need to do anything here + if (!cpi->common.mb_no_coeff_skip) + return; + + // if all 4 MBs skipped coeff coding, nothing to be done + if (skip[ 0] && skip[ 1] && skip[ 2] && skip[ 3] && + skip[ 4] && skip[ 5] && skip[ 6] && skip[ 7] && + skip[ 8] && skip[ 9] && skip[10] && skip[11] && + skip[12] && skip[13] && skip[14] && skip[15]) + return; + + // so the situation now is that we want to skip coeffs + // for some MBs, but not all, and we didn't code EOB + // coefficients for them. However, the skip flag for this + // SB will be 0 overall, so we need to insert EOBs in the + // middle of the token tree. Do so here. + for (n = 0; n < 16; n++) { + if (n < 15) { + n_tokens[n] = t[n + 1] - t[n]; + } else { + n_tokens[n] = *tp - t[15]; + } + if (n_tokens[n]) { + memcpy(tokens[n], t[n], n_tokens[n] * sizeof(*t[0])); + } + } + + // reset pointer, stuff EOBs where necessary + *tp = t[0]; + for (n = 0; n < 16; n++) { + if (skip[n]) { + x->e_mbd.above_context = &ta[n]; + x->e_mbd.left_context = &tl[n]; + vp9_stuff_mb(cpi, &x->e_mbd, tp, !output_enabled); + } else { + if (n_tokens[n]) { + memcpy(*tp, tokens[n], sizeof(*t[0]) * n_tokens[n]); + } + (*tp) += n_tokens[n]; + } + } + } +} +#endif // CONFIG_SUPERBLOCKS64 #endif /* CONFIG_SUPERBLOCKS */ -static void encode_macroblock(VP9_COMP *cpi, MACROBLOCK *x, - TOKENEXTRA **t, int recon_yoffset, - int recon_uvoffset, int output_enabled, - int mb_col, int mb_row) { - VP9_COMMON *cm = &cpi->common; +static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t, + int recon_yoffset, int recon_uvoffset, + int output_enabled, + int mb_row, int mb_col) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO * mbmi = &xd->mode_info_context->mbmi; + MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; unsigned char *segment_id = &mbmi->segment_id; int seg_ref_active; unsigned char ref_pred_flag; x->skip = 0; #if CONFIG_SUPERBLOCKS - assert(!xd->mode_info_context->mbmi.encoded_as_sb); + assert(!xd->mode_info_context->mbmi.sb_type); #endif #ifdef ENC_DEBUG @@ -2332,10 +2379,11 @@ static void encode_macroblock(VP9_COMP *cpi, MACROBLOCK *x, } #if CONFIG_SUPERBLOCKS -static void encode_superblock(VP9_COMP *cpi, MACROBLOCK *x, - TOKENEXTRA **t, int recon_yoffset, - int recon_uvoffset, int mb_col, int mb_row) { +static void encode_superblock32(VP9_COMP *cpi, TOKENEXTRA **t, + int recon_yoffset, int recon_uvoffset, + int output_enabled, int mb_row, int mb_col) { VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; const uint8_t *src = x->src.y_buffer; uint8_t *dst = xd->dst.y_buffer; @@ -2403,7 +2451,8 @@ static void encode_superblock(VP9_COMP *cpi, MACROBLOCK *x, if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) { vp9_build_intra_predictors_sby_s(&x->e_mbd); vp9_build_intra_predictors_sbuv_s(&x->e_mbd); - sum_intra_stats(cpi, x); + if (output_enabled) + sum_intra_stats(cpi, x); } else { int ref_fb_idx; @@ -2461,7 +2510,7 @@ static void encode_superblock(VP9_COMP *cpi, MACROBLOCK *x, vp9_recon_sbuv_s_c(&x->e_mbd, udst, vdst); if (!x->skip) { - vp9_tokenize_sb(cpi, &x->e_mbd, t, 0); + vp9_tokenize_sb(cpi, &x->e_mbd, t, !output_enabled); } else { int mb_skip_context = cpi->common.mb_no_coeff_skip ? @@ -2470,11 +2519,13 @@ static void encode_superblock(VP9_COMP *cpi, MACROBLOCK *x, 0; mi->mbmi.mb_skip_coeff = 1; if (cm->mb_no_coeff_skip) { - cpi->skip_true_count[mb_skip_context]++; + if (output_enabled) + cpi->skip_true_count[mb_skip_context]++; vp9_fix_contexts_sb(xd); } else { - vp9_stuff_sb(cpi, xd, t, 0); - cpi->skip_false_count[mb_skip_context]++; + vp9_stuff_sb(cpi, xd, t, !output_enabled); + if (output_enabled) + cpi->skip_false_count[mb_skip_context]++; } } @@ -2493,7 +2544,7 @@ static void encode_superblock(VP9_COMP *cpi, MACROBLOCK *x, for (n = 0; n < 4; n++) { int x_idx = n & 1, y_idx = n >> 1; - xd->left_context = cm->left_context + y_idx; + xd->left_context = cm->left_context + y_idx + (mb_row & 2); xd->above_context = cm->above_context + mb_col + x_idx; memcpy(&ta[n], xd->above_context, sizeof(ta[n])); memcpy(&tl[n], xd->left_context, sizeof(tl[n])); @@ -2520,7 +2571,7 @@ static void encode_superblock(VP9_COMP *cpi, MACROBLOCK *x, vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride); if (!x->skip) { - vp9_tokenize_mb(cpi, &x->e_mbd, t, 0); + vp9_tokenize_mb(cpi, &x->e_mbd, t, !output_enabled); skip[n] = xd->mode_info_context->mbmi.mb_skip_coeff; } else { int mb_skip_context = @@ -2531,42 +2582,327 @@ static void encode_superblock(VP9_COMP *cpi, MACROBLOCK *x, xd->mode_info_context->mbmi.mb_skip_coeff = skip[n] = 1; if (cpi->common.mb_no_coeff_skip) { // TODO(rbultje) this should be done per-sb instead of per-mb? - cpi->skip_true_count[mb_skip_context]++; + if (output_enabled) + cpi->skip_true_count[mb_skip_context]++; vp9_reset_mb_tokens_context(xd); } else { - vp9_stuff_mb(cpi, xd, t, 0); + vp9_stuff_mb(cpi, xd, t, !output_enabled); // TODO(rbultje) this should be done per-sb instead of per-mb? - cpi->skip_false_count[mb_skip_context]++; + if (output_enabled) + cpi->skip_false_count[mb_skip_context]++; } } } xd->mode_info_context = mi; - update_sb_skip_coeff_state(cpi, x, ta, tl, tp, t, skip); + update_sb_skip_coeff_state(cpi, x, ta, tl, tp, t, skip, output_enabled); #if CONFIG_TX32X32 } #endif - if (cm->txfm_mode == TX_MODE_SELECT && - !((cm->mb_no_coeff_skip && skip[0] && skip[1] && skip[2] && skip[3]) || - (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) && - vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0))) { - cpi->txfm_count_32x32p[mi->mbmi.txfm_size]++; + if (output_enabled) { + if (cm->txfm_mode == TX_MODE_SELECT && + !((cm->mb_no_coeff_skip && skip[0] && skip[1] && skip[2] && skip[3]) || + (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) && + vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0))) { + cpi->txfm_count_32x32p[mi->mbmi.txfm_size]++; + } else { + TX_SIZE sz = (cm->txfm_mode == TX_MODE_SELECT) ? +#if CONFIG_TX32X32 + TX_32X32 : +#else + TX_16X16 : +#endif + cm->txfm_mode; + mi->mbmi.txfm_size = sz; + if (mb_col < cm->mb_cols - 1) + mi[1].mbmi.txfm_size = sz; + if (mb_row < cm->mb_rows - 1) { + mi[mis].mbmi.txfm_size = sz; + if (mb_col < cm->mb_cols - 1) + mi[mis + 1].mbmi.txfm_size = sz; + } + } + } +} + +#if CONFIG_SUPERBLOCKS64 +static void encode_superblock64(VP9_COMP *cpi, TOKENEXTRA **t, + int recon_yoffset, int recon_uvoffset, + int output_enabled, int mb_row, int mb_col) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &cpi->mb; + MACROBLOCKD *const xd = &x->e_mbd; + const uint8_t *src = x->src.y_buffer; + uint8_t *dst = xd->dst.y_buffer; + const uint8_t *usrc = x->src.u_buffer; + uint8_t *udst = xd->dst.u_buffer; + const uint8_t *vsrc = x->src.v_buffer; + uint8_t *vdst = xd->dst.v_buffer; + int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride; + int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride; + int seg_ref_active; + unsigned char ref_pred_flag; + int n; + TOKENEXTRA *tp[16]; + int skip[16]; + MODE_INFO *mi = x->e_mbd.mode_info_context; + unsigned int segment_id = mi->mbmi.segment_id; + ENTROPY_CONTEXT_PLANES ta[16], tl[16]; + const int mis = cm->mode_info_stride; + + x->skip = 0; + + if (cm->frame_type == KEY_FRAME) { + if (cpi->oxcf.tuning == VP8_TUNE_SSIM) { + adjust_act_zbin(cpi, x); + vp9_update_zbin_extra(cpi, x); + } + } else { + vp9_setup_interp_filters(xd, xd->mode_info_context->mbmi.interp_filter, cm); + + if (cpi->oxcf.tuning == VP8_TUNE_SSIM) { + // Adjust the zbin based on this MB rate. + adjust_act_zbin(cpi, x); + } + + // Experimental code. Special case for gf and arf zeromv modes. + // Increase zbin size to suppress noise + cpi->zbin_mode_boost = 0; + if (cpi->zbin_mode_boost_enabled) { + if (xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME) { + if (xd->mode_info_context->mbmi.mode == ZEROMV) { + if (xd->mode_info_context->mbmi.ref_frame != LAST_FRAME) + cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST; + else + cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST; + } else if (xd->mode_info_context->mbmi.mode == SPLITMV) { + cpi->zbin_mode_boost = 0; + } else { + cpi->zbin_mode_boost = MV_ZBIN_BOOST; + } + } + } + + vp9_update_zbin_extra(cpi, x); + + seg_ref_active = vp9_segfeature_active(xd, segment_id, SEG_LVL_REF_FRAME); + + // SET VARIOUS PREDICTION FLAGS + + // Did the chosen reference frame match its predicted value. + ref_pred_flag = ((xd->mode_info_context->mbmi.ref_frame == + vp9_get_pred_ref(cm, xd))); + vp9_set_pred_flag(xd, PRED_REF, ref_pred_flag); + } + + if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) { + vp9_build_intra_predictors_sb64y_s(&x->e_mbd); + vp9_build_intra_predictors_sb64uv_s(&x->e_mbd); + if (output_enabled) + sum_intra_stats(cpi, x); } else { - TX_SIZE sz = (cm->txfm_mode == TX_MODE_SELECT) ? + int ref_fb_idx; + + assert(cm->frame_type != KEY_FRAME); + + if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME) + ref_fb_idx = cpi->common.lst_fb_idx; + else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME) + ref_fb_idx = cpi->common.gld_fb_idx; + else + ref_fb_idx = cpi->common.alt_fb_idx; + + xd->pre.y_buffer = + cpi->common.yv12_fb[ref_fb_idx].y_buffer + recon_yoffset; + xd->pre.u_buffer = + cpi->common.yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset; + xd->pre.v_buffer = + cpi->common.yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset; + + if (xd->mode_info_context->mbmi.second_ref_frame > 0) { + int second_ref_fb_idx; + + if (xd->mode_info_context->mbmi.second_ref_frame == LAST_FRAME) + second_ref_fb_idx = cpi->common.lst_fb_idx; + else if (xd->mode_info_context->mbmi.second_ref_frame == GOLDEN_FRAME) + second_ref_fb_idx = cpi->common.gld_fb_idx; + else + second_ref_fb_idx = cpi->common.alt_fb_idx; + + xd->second_pre.y_buffer = + cpi->common.yv12_fb[second_ref_fb_idx].y_buffer + recon_yoffset; + xd->second_pre.u_buffer = + cpi->common.yv12_fb[second_ref_fb_idx].u_buffer + recon_uvoffset; + xd->second_pre.v_buffer = + cpi->common.yv12_fb[second_ref_fb_idx].v_buffer + recon_uvoffset; + } + + vp9_build_inter64x64_predictors_sb(xd, xd->dst.y_buffer, + xd->dst.u_buffer, xd->dst.v_buffer, + xd->dst.y_stride, xd->dst.uv_stride); + } + +#if CONFIG_TX32X32 + if (xd->mode_info_context->mbmi.txfm_size == TX_32X32) { + int n; + + for (n = 0; n < 4; n++) { + int x_idx = n & 1, y_idx = n >> 1; + + xd->mode_info_context = mi + x_idx * 2 + mis * y_idx * 2; + xd->left_context = cm->left_context + (y_idx << 1); + xd->above_context = cm->above_context + mb_col + (x_idx << 1); + memcpy(&ta[n * 2], xd->above_context, sizeof(*ta) * 2); + memcpy(&tl[n * 2], xd->left_context, sizeof(*tl) * 2); + tp[n] = *t; + xd->mode_info_context = mi + x_idx * 2 + y_idx * mis * 2; + vp9_subtract_sby_s_c(x->sb_coeff_data.src_diff, + src + x_idx * 32 + y_idx * 32 * src_y_stride, + src_y_stride, + dst + x_idx * 32 + y_idx * 32 * dst_y_stride, + dst_y_stride); + vp9_subtract_sbuv_s_c(x->sb_coeff_data.src_diff, + usrc + x_idx * 16 + y_idx * 16 * src_uv_stride, + vsrc + x_idx * 16 + y_idx * 16 * src_uv_stride, + src_uv_stride, + udst + x_idx * 16 + y_idx * 16 * dst_uv_stride, + vdst + x_idx * 16 + y_idx * 16 * dst_uv_stride, + dst_uv_stride); + vp9_transform_sby_32x32(x); + vp9_transform_sbuv_16x16(x); + vp9_quantize_sby_32x32(x); + vp9_quantize_sbuv_16x16(x); + // TODO(rbultje): trellis optimize + vp9_inverse_transform_sbuv_16x16(&x->e_mbd.sb_coeff_data); + vp9_inverse_transform_sby_32x32(&x->e_mbd.sb_coeff_data); + vp9_recon_sby_s_c(&x->e_mbd, + dst + 32 * x_idx + 32 * y_idx * dst_y_stride, + dst_y_stride); + vp9_recon_sbuv_s_c(&x->e_mbd, + udst + x_idx * 16 + y_idx * 16 * dst_uv_stride, + vdst + x_idx * 16 + y_idx * 16 * dst_uv_stride); + + if (!x->skip) { + vp9_tokenize_sb(cpi, &x->e_mbd, t, !output_enabled); + } else { + int mb_skip_context = cpi->common.mb_no_coeff_skip ? + (mi - 1)->mbmi.mb_skip_coeff + + (mi - mis)->mbmi.mb_skip_coeff : 0; + xd->mode_info_context->mbmi.mb_skip_coeff = 1; + if (cm->mb_no_coeff_skip) { + if (output_enabled) + cpi->skip_true_count[mb_skip_context]++; + vp9_fix_contexts_sb(xd); + } else { + vp9_stuff_sb(cpi, xd, t, !output_enabled); + if (output_enabled) + cpi->skip_false_count[mb_skip_context]++; + } + } + + // copy skip flag on all mb_mode_info contexts in this SB + // if this was a skip at this txfm size + if (mb_col + x_idx * 2 < cm->mb_cols - 1) + mi[mis * y_idx * 2 + x_idx * 2 + 1].mbmi.mb_skip_coeff = + mi[mis * y_idx * 2 + x_idx * 2].mbmi.mb_skip_coeff; + if (mb_row + y_idx * 2 < cm->mb_rows - 1) { + mi[mis * y_idx * 2 + x_idx * 2 + mis].mbmi.mb_skip_coeff = + mi[mis * y_idx * 2 + x_idx * 2].mbmi.mb_skip_coeff; + if (mb_col + x_idx * 2 < cm->mb_cols - 1) + mi[mis * y_idx * 2 + x_idx * 2 + mis + 1].mbmi.mb_skip_coeff = + mi[mis * y_idx * 2 + x_idx * 2].mbmi.mb_skip_coeff; + } + skip[n] = xd->mode_info_context->mbmi.mb_skip_coeff; + } + } else +#endif + { + for (n = 0; n < 16; n++) { + const int x_idx = n & 3, y_idx = n >> 2; + + xd->left_context = cm->left_context + y_idx; + xd->above_context = cm->above_context + mb_col + x_idx; + memcpy(&ta[n], xd->above_context, sizeof(ta[n])); + memcpy(&tl[n], xd->left_context, sizeof(tl[n])); + tp[n] = *t; + xd->mode_info_context = mi + x_idx + y_idx * mis; + + vp9_subtract_mby_s_c(x->src_diff, + src + x_idx * 16 + y_idx * 16 * src_y_stride, + src_y_stride, + dst + x_idx * 16 + y_idx * 16 * dst_y_stride, + dst_y_stride); + vp9_subtract_mbuv_s_c(x->src_diff, + usrc + x_idx * 8 + y_idx * 8 * src_uv_stride, + vsrc + x_idx * 8 + y_idx * 8 * src_uv_stride, + src_uv_stride, + udst + x_idx * 8 + y_idx * 8 * dst_uv_stride, + vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride, + dst_uv_stride); + vp9_fidct_mb(x); + vp9_recon_mby_s_c(&x->e_mbd, + dst + x_idx * 16 + y_idx * 16 * dst_y_stride); + vp9_recon_mbuv_s_c(&x->e_mbd, + udst + x_idx * 8 + y_idx * 8 * dst_uv_stride, + vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride); + + if (!x->skip) { + vp9_tokenize_mb(cpi, &x->e_mbd, t, !output_enabled); + skip[n] = xd->mode_info_context->mbmi.mb_skip_coeff; + } else { + int mb_skip_context = cpi->common.mb_no_coeff_skip ? + (x->e_mbd.mode_info_context - 1)->mbmi.mb_skip_coeff + + (x->e_mbd.mode_info_context - mis)->mbmi.mb_skip_coeff : 0; + xd->mode_info_context->mbmi.mb_skip_coeff = skip[n] = 1; + if (cpi->common.mb_no_coeff_skip) { + // TODO(rbultje) this should be done per-sb instead of per-mb? + if (output_enabled) + cpi->skip_true_count[mb_skip_context]++; + vp9_reset_mb_tokens_context(xd); + } else { + vp9_stuff_mb(cpi, xd, t, !output_enabled); + // TODO(rbultje) this should be done per-sb instead of per-mb? + if (output_enabled) + cpi->skip_false_count[mb_skip_context]++; + } + } + } + } + + xd->mode_info_context = mi; + update_sb64_skip_coeff_state(cpi, x, ta, tl, tp, t, skip, output_enabled); + + if (output_enabled) { + if (cm->txfm_mode == TX_MODE_SELECT && + !((cm->mb_no_coeff_skip && + ((mi->mbmi.txfm_size == TX_32X32 && + skip[0] && skip[1] && skip[2] && skip[3]) || + (mi->mbmi.txfm_size != TX_32X32 && + skip[0] && skip[1] && skip[2] && skip[3] && + skip[4] && skip[5] && skip[6] && skip[7] && + skip[8] && skip[9] && skip[10] && skip[11] && + skip[12] && skip[13] && skip[14] && skip[15]))) || + (vp9_segfeature_active(xd, segment_id, SEG_LVL_EOB) && + vp9_get_segdata(xd, segment_id, SEG_LVL_EOB) == 0))) { + cpi->txfm_count_32x32p[mi->mbmi.txfm_size]++; + } else { + int x, y; + TX_SIZE sz = (cm->txfm_mode == TX_MODE_SELECT) ? #if CONFIG_TX32X32 TX_32X32 : #else TX_16X16 : #endif cm->txfm_mode; - mi->mbmi.txfm_size = sz; - if (mb_col < cm->mb_cols - 1) - mi[1].mbmi.txfm_size = sz; - if (mb_row < cm->mb_rows - 1) { - mi[mis].mbmi.txfm_size = sz; - if (mb_col < cm->mb_cols - 1) - mi[mis + 1].mbmi.txfm_size = sz; + for (y = 0; y < 4; y++) { + for (x = 0; x < 4; x++) { + if (mb_col + x < cm->mb_cols && mb_row + y < cm->mb_rows) { + mi[mis * y + x].mbmi.txfm_size = sz; + } + } + } } } } +#endif // CONFIG_SUPERBLOCKS64 #endif diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index 6bce1ad..38a2eab 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -17,6 +17,7 @@ #include #include #include "vp9/common/vp9_findnearmv.h" +#include "vp9/common/vp9_common.h" #ifdef ENTROPY_STATS static int mv_ref_ct [31] [4] [2]; @@ -241,9 +242,6 @@ void vp9_init3smotion_compensation(MACROBLOCK *x, int stride) { }, \ v = INT_MAX;) -#define MIN(x,y) (((x)<(y))?(x):(y)) -#define MAX(x,y) (((x)>(y))?(x):(y)) - int vp9_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *bestmv, int_mv *ref_mv, int error_per_bit, diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 9b186c2..44f20ad 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -556,43 +556,19 @@ static void print_seg_map(VP9_COMP *cpi) { } static void update_reference_segmentation_map(VP9_COMP *cpi) { - VP9_COMMON *cm = &cpi->common; - int row, col, sb_rows = (cm->mb_rows + 1) >> 1, sb_cols = (cm->mb_cols + 1) >> 1; - MODE_INFO *mi = cm->mi; - uint8_t *segmap = cpi->segmentation_map; - uint8_t *segcache = cm->last_frame_seg_map; - - for (row = 0; row < sb_rows; row++) { - for (col = 0; col < sb_cols; col++) { - MODE_INFO *miptr = mi + col * 2; - uint8_t *cache = segcache + col * 2; -#if CONFIG_SUPERBLOCKS - if (miptr->mbmi.encoded_as_sb) { - cache[0] = miptr->mbmi.segment_id; - if (!(cm->mb_cols & 1) || col < sb_cols - 1) - cache[1] = miptr->mbmi.segment_id; - if (!(cm->mb_rows & 1) || row < sb_rows - 1) { - cache[cm->mb_cols] = miptr->mbmi.segment_id; - if (!(cm->mb_cols & 1) || col < sb_cols - 1) - cache[cm->mb_cols + 1] = miptr->mbmi.segment_id; - } - } else -#endif - { - cache[0] = miptr[0].mbmi.segment_id; - if (!(cm->mb_cols & 1) || col < sb_cols - 1) - cache[1] = miptr[1].mbmi.segment_id; - if (!(cm->mb_rows & 1) || row < sb_rows - 1) { - cache[cm->mb_cols] = miptr[cm->mode_info_stride].mbmi.segment_id; - if (!(cm->mb_cols & 1) || col < sb_cols - 1) - cache[1] = miptr[1].mbmi.segment_id; - cache[cm->mb_cols + 1] = miptr[cm->mode_info_stride + 1].mbmi.segment_id; - } - } + VP9_COMMON *const cm = &cpi->common; + int row, col; + MODE_INFO *mi, *mi_ptr = cm->mi; + uint8_t *cache_ptr = cm->last_frame_seg_map, *cache; + + for (row = 0; row < cm->mb_rows; row++) { + mi = mi_ptr; + cache = cache_ptr; + for (col = 0; col < cm->mb_cols; col++, mi++, cache++) { + cache[0] = mi->mbmi.segment_id; } - segmap += 2 * cm->mb_cols; - segcache += 2 * cm->mb_cols; - mi += 2 * cm->mode_info_stride; + mi_ptr += cm->mode_info_stride; + cache_ptr += cm->mb_cols; } } @@ -1788,7 +1764,10 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { cm->prob_gf_coded = 128; cm->prob_intra_coded = 63; #if CONFIG_SUPERBLOCKS - cm->sb_coded = 200; + cm->sb32_coded = 200; +#if CONFIG_SUPERBLOCKS64 + cm->sb64_coded = 200; +#endif #endif for (i = 0; i < COMP_PRED_CONTEXTS; i++) cm->prob_comppred[i] = 128; @@ -1994,6 +1973,13 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { vp9_variance_halfpixvar32x32_h, vp9_variance_halfpixvar32x32_v, vp9_variance_halfpixvar32x32_hv, vp9_sad32x32x3, vp9_sad32x32x8, vp9_sad32x32x4d) + +#if CONFIG_SUPERBLOCKS64 + BFP(BLOCK_64X64, vp9_sad64x64, vp9_variance64x64, vp9_sub_pixel_variance64x64, + vp9_variance_halfpixvar64x64_h, vp9_variance_halfpixvar64x64_v, + vp9_variance_halfpixvar64x64_hv, vp9_sad64x64x3, vp9_sad64x64x8, + vp9_sad64x64x4d) +#endif #endif BFP(BLOCK_16X16, vp9_sad16x16, vp9_variance16x16, vp9_sub_pixel_variance16x16, diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index 7c9181b..1142835 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -390,8 +390,15 @@ enum BlockSize { BLOCK_4X4 = PARTITIONING_4X4, BLOCK_16X16, BLOCK_MAX_SEGMENTS, +#if CONFIG_SUPERBLOCKS BLOCK_32X32 = BLOCK_MAX_SEGMENTS, +#if CONFIG_SUPERBLOCKS64 + BLOCK_64X64, +#endif // CONFIG_SUPERBLOCKS64 BLOCK_MAX_SB_SEGMENTS, +#else // CONFIG_SUPERBLOCKS + BLOCK_MAX_SB_SEGMENTS = BLOCK_MAX_SEGMENTS, +#endif // CONFIG_SUPERBLOCKS }; typedef struct VP9_COMP { @@ -571,7 +578,10 @@ typedef struct VP9_COMP { int cq_target_quality; #if CONFIG_SUPERBLOCKS - int sb_count; + int sb32_count[2]; +#if CONFIG_SUPERBLOCKS64 + int sb64_count[2]; +#endif int sb_ymode_count [VP9_I32X32_MODES]; #endif int ymode_count[VP9_YMODES]; /* intra MB type cts this frame */ diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 774b577..a79cb5a 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -41,6 +41,7 @@ #include "vp9/common/vp9_entropy.h" #include "vp9_rtcd.h" #include "vp9/common/vp9_mvref_common.h" +#include "vp9/common/vp9_common.h" #define MAXF(a,b) (((a) > (b)) ? (a) : (b)) @@ -926,14 +927,21 @@ static void copy_predictor(uint8_t *dst, const uint8_t *predictor) { #if CONFIG_SUPERBLOCKS #if CONFIG_TX32X32 -static int rdcost_sby_32x32(MACROBLOCK *x) { +static int rdcost_sby_32x32(MACROBLOCK *x, int backup) { MACROBLOCKD * const xd = &x->e_mbd; ENTROPY_CONTEXT_PLANES t_above, t_left; - ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) &t_above, - *tl = (ENTROPY_CONTEXT *) &t_left; + ENTROPY_CONTEXT *ta, *tl; - vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES)); + if (backup) { + ta = (ENTROPY_CONTEXT *) &t_above, + tl = (ENTROPY_CONTEXT *) &t_left; + + vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES)); + } else { + ta = (ENTROPY_CONTEXT *) xd->above_context; + tl = (ENTROPY_CONTEXT *) xd->left_context; + } return cost_coeffs(x, xd->block, PLANE_TYPE_Y_WITH_DC, ta, tl, TX_32X32); } @@ -953,7 +961,8 @@ static int vp9_sb_block_error_c(int16_t *coeff, int16_t *dqcoeff, #define DEBUG_ERROR 0 static void super_block_yrd_32x32(MACROBLOCK *x, - int *rate, int *distortion, int *skippable) { + int *rate, int *distortion, int *skippable, + int backup) { SUPERBLOCK * const x_sb = &x->sb_coeff_data; MACROBLOCKD * const xd = &x->e_mbd; SUPERBLOCKD * const xd_sb = &xd->sb_coeff_data; @@ -976,7 +985,7 @@ static void super_block_yrd_32x32(MACROBLOCK *x, printf("IDCT/FDCT error 32x32: %d (d: %d)\n", vp9_block_error_c(x_sb->src_diff, out, 1024), *distortion); #endif - *rate = rdcost_sby_32x32(x); + *rate = rdcost_sby_32x32(x, backup); *skippable = vp9_sby_is_skippable_32x32(&x->e_mbd); } #endif @@ -1005,7 +1014,7 @@ static void super_block_yrd(VP9_COMP *cpi, #if CONFIG_TX32X32 vp9_subtract_sby_s_c(x->sb_coeff_data.src_diff, src, src_y_stride, dst, dst_y_stride); - super_block_yrd_32x32(x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32]); + super_block_yrd_32x32(x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32], 1); #endif #if DEBUG_ERROR @@ -1065,6 +1074,104 @@ static void super_block_yrd(VP9_COMP *cpi, xd->above_context = orig_above; xd->left_context = orig_left; } + +static void super_block_64_yrd(VP9_COMP *cpi, + MACROBLOCK *x, int *rate, int *distortion, + int *skip, + int64_t txfm_cache[NB_TXFM_MODES]) { + MACROBLOCKD *const xd = &x->e_mbd; + int r[TX_SIZE_MAX_SB][2], d[TX_SIZE_MAX_SB], s[TX_SIZE_MAX_SB], n; + const uint8_t *src = x->src.y_buffer, *dst = xd->dst.y_buffer; + int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride; + ENTROPY_CONTEXT_PLANES t_above[TX_SIZE_MAX_SB][4], + *orig_above = xd->above_context; + ENTROPY_CONTEXT_PLANES t_left[TX_SIZE_MAX_SB][4], + *orig_left = xd->left_context; + + for (n = TX_4X4; n < TX_SIZE_MAX_SB; n++) { + vpx_memcpy(t_above[n], xd->above_context, sizeof(t_above[n])); + vpx_memcpy(t_left[n], xd->left_context, sizeof(t_left[n])); + r[n][0] = 0; + d[n] = 0; + s[n] = 1; + } + +#if CONFIG_TX32X32 + for (n = 0; n < 4; n++) { + int x_idx = n & 1, y_idx = n >> 1; + int r_tmp, d_tmp, s_tmp; + + xd->above_context = &t_above[TX_32X32][x_idx << 1]; + xd->left_context = &t_left[TX_32X32][y_idx << 1]; + vp9_subtract_sby_s_c(x->sb_coeff_data.src_diff, + src + 32 * x_idx + 32 * y_idx * src_y_stride, + src_y_stride, + dst + 32 * x_idx + 32 * y_idx * dst_y_stride, + dst_y_stride); + super_block_yrd_32x32(x, &r_tmp, &d_tmp, &s_tmp, 0); + r[TX_32X32][0] += r_tmp; + d[TX_32X32] += d_tmp; + s[TX_32X32] = s[TX_32X32] && s_tmp; + } +#endif + +#if DEBUG_ERROR + int err[3] = { 0, 0, 0 }; +#endif + for (n = 0; n < 16; n++) { + int x_idx = n & 3, y_idx = n >> 2; + int r_tmp, d_tmp, s_tmp; + + vp9_subtract_mby_s_c(x->src_diff, + src + x_idx * 16 + y_idx * 16 * src_y_stride, + src_y_stride, + dst + x_idx * 16 + y_idx * 16 * dst_y_stride, + dst_y_stride); + + xd->above_context = &t_above[TX_16X16][x_idx]; + xd->left_context = &t_left[TX_16X16][y_idx]; + macro_block_yrd_16x16(x, &r_tmp, &d_tmp, &s_tmp, 0); + d[TX_16X16] += d_tmp; + r[TX_16X16][0] += r_tmp; + s[TX_16X16] = s[TX_16X16] && s_tmp; +#if DEBUG_ERROR + vp9_inverse_transform_mby_16x16(xd); + err[2] += vp9_block_error_c(xd->diff, x->src_diff, 256); +#endif + + xd->above_context = &t_above[TX_4X4][x_idx]; + xd->left_context = &t_left[TX_4X4][y_idx]; + macro_block_yrd_4x4(x, &r_tmp, &d_tmp, &s_tmp, 0); + d[TX_4X4] += d_tmp; + r[TX_4X4][0] += r_tmp; + s[TX_4X4] = s[TX_4X4] && s_tmp; +#if DEBUG_ERROR + vp9_inverse_transform_mby_4x4(xd); + err[0] += vp9_block_error_c(xd->diff, x->src_diff, 256); +#endif + + xd->above_context = &t_above[TX_8X8][x_idx]; + xd->left_context = &t_left[TX_8X8][y_idx]; + macro_block_yrd_8x8(x, &r_tmp, &d_tmp, &s_tmp, 0); + d[TX_8X8] += d_tmp; + r[TX_8X8][0] += r_tmp; + s[TX_8X8] = s[TX_8X8] && s_tmp; +#if DEBUG_ERROR + vp9_inverse_transform_mby_8x8(xd); + err[1] += vp9_block_error_c(xd->diff, x->src_diff, 256); +#endif + } +#if DEBUG_ERROR + printf("IDCT/FDCT error 16x16: %d (d: %d)\n", err[2], d[2]); + printf("IDCT/FDCT error 8x8: %d (d: %d)\n", err[1], d[1]); + printf("IDCT/FDCT error 4x4: %d (d: %d)\n", err[0], d[0]); +#endif + choose_txfm_size_from_rd(cpi, x, r, rate, d, distortion, s, skip, txfm_cache, + TX_SIZE_MAX_SB - 1); + + xd->above_context = orig_above; + xd->left_context = orig_left; +} #endif static void copy_predictor_8x8(uint8_t *dst, const uint8_t *predictor) { @@ -1359,6 +1466,48 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, return best_rd; } + +#if CONFIG_SUPERBLOCKS64 +static int64_t rd_pick_intra_sb64y_mode(VP9_COMP *cpi, + MACROBLOCK *x, + int *rate, + int *rate_tokenonly, + int *distortion, + int *skippable, + int64_t txfm_cache[NB_TXFM_MODES]) { + MB_PREDICTION_MODE mode; + MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected); + int this_rate, this_rate_tokenonly; + int this_distortion, s; + int64_t best_rd = INT64_MAX, this_rd; + + /* Y Search for 32x32 intra prediction mode */ + for (mode = DC_PRED; mode <= TM_PRED; mode++) { + x->e_mbd.mode_info_context->mbmi.mode = mode; + vp9_build_intra_predictors_sb64y_s(&x->e_mbd); + + super_block_64_yrd(cpi, x, &this_rate_tokenonly, + &this_distortion, &s, txfm_cache); + this_rate = this_rate_tokenonly + + x->mbmode_cost[x->e_mbd.frame_type] + [x->e_mbd.mode_info_context->mbmi.mode]; + this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); + + if (this_rd < best_rd) { + mode_selected = mode; + best_rd = this_rd; + *rate = this_rate; + *rate_tokenonly = this_rate_tokenonly; + *distortion = this_distortion; + *skippable = s; + } + } + + x->e_mbd.mode_info_context->mbmi.mode = mode_selected; + + return best_rd; +} +#endif // CONFIG_SUPERBLOCKS64 #endif static int64_t rd_pick_intra16x16mby_mode(VP9_COMP *cpi, @@ -1735,18 +1884,23 @@ static int64_t rd_inter16x16_uv_8x8(VP9_COMP *cpi, MACROBLOCK *x, int *rate, #if CONFIG_SUPERBLOCKS #if CONFIG_TX32X32 -static int rd_cost_sbuv_16x16(MACROBLOCK *x) { +static int rd_cost_sbuv_16x16(MACROBLOCK *x, int backup) { int b; int cost = 0; MACROBLOCKD *const xd = &x->e_mbd; ENTROPY_CONTEXT_PLANES t_above, t_left; ENTROPY_CONTEXT *ta, *tl; - vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES)); - vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES)); + if (backup) { + vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES)); + vpx_memcpy(&t_left, xd->left_context, sizeof(ENTROPY_CONTEXT_PLANES)); - ta = (ENTROPY_CONTEXT *) &t_above; - tl = (ENTROPY_CONTEXT *) &t_left; + ta = (ENTROPY_CONTEXT *) &t_above; + tl = (ENTROPY_CONTEXT *) &t_left; + } else { + ta = (ENTROPY_CONTEXT *)xd->above_context; + tl = (ENTROPY_CONTEXT *)xd->left_context; + } for (b = 16; b < 24; b += 4) cost += cost_coeffs(x, xd->block + b, PLANE_TYPE_UV, @@ -1757,13 +1911,14 @@ static int rd_cost_sbuv_16x16(MACROBLOCK *x) { } static void rd_inter32x32_uv_16x16(MACROBLOCK *x, int *rate, - int *distortion, int *skip) { + int *distortion, int *skip, + int backup) { MACROBLOCKD *const xd = &x->e_mbd; vp9_transform_sbuv_16x16(x); vp9_quantize_sbuv_16x16(x); - *rate = rd_cost_sbuv_16x16(x); + *rate = rd_cost_sbuv_16x16(x, backup); *distortion = vp9_block_error_c(x->sb_coeff_data.coeff + 1024, xd->sb_coeff_data.dqcoeff + 1024, 512) >> 2; *skip = vp9_sbuv_is_skippable_16x16(xd); @@ -1783,7 +1938,7 @@ static int64_t rd_inter32x32_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate, vp9_subtract_sbuv_s_c(x->sb_coeff_data.src_diff, usrc, vsrc, src_uv_stride, udst, vdst, dst_uv_stride); - rd_inter32x32_uv_16x16(x, rate, distortion, skip); + rd_inter32x32_uv_16x16(x, rate, distortion, skip, 1); } else { #endif int n, r = 0, d = 0; @@ -1833,6 +1988,14 @@ static int64_t rd_inter32x32_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate, return RDCOST(x->rdmult, x->rddiv, *rate, *distortion); } + +static void super_block_64_uvrd(MACROBLOCK *x, int *rate, + int *distortion, int *skip); +static int64_t rd_inter64x64_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate, + int *distortion, int fullpixel, int *skip) { + super_block_64_uvrd(x, rate, distortion, skip); + return RDCOST(x->rdmult, x->rddiv, *rate, *distortion); +} #endif static int64_t rd_inter4x4_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate, @@ -1984,13 +2147,13 @@ static void super_block_uvrd(MACROBLOCK *x, vp9_subtract_sbuv_s_c(x->sb_coeff_data.src_diff, usrc, vsrc, src_uv_stride, udst, vdst, dst_uv_stride); - rd_inter32x32_uv_16x16(x, rate, distortion, skippable); + rd_inter32x32_uv_16x16(x, rate, distortion, skippable, 1); } else { #endif int d = 0, r = 0, n, s = 1; ENTROPY_CONTEXT_PLANES t_above[2], t_left[2]; - ENTROPY_CONTEXT_PLANES *ta = xd->above_context; - ENTROPY_CONTEXT_PLANES *tl = xd->left_context; + ENTROPY_CONTEXT_PLANES *ta_orig = xd->above_context; + ENTROPY_CONTEXT_PLANES *tl_orig = xd->left_context; memcpy(t_above, xd->above_context, sizeof(t_above)); memcpy(t_left, xd->left_context, sizeof(t_left)); @@ -2016,24 +2179,107 @@ static void super_block_uvrd(MACROBLOCK *x, } d += vp9_mbuverror(x) >> 2; - xd->above_context = ta + x_idx; - xd->left_context = tl + y_idx; - r += rd_cost_mbuv_8x8(x, 0); + xd->above_context = t_above + x_idx; + xd->left_context = t_left + y_idx; + if (mbmi->txfm_size == TX_4X4) { + r += rd_cost_mbuv_4x4(x, 0); + } else { + r += rd_cost_mbuv_8x8(x, 0); + } } - xd->above_context = ta; - xd->left_context = tl; + xd->above_context = ta_orig; + xd->left_context = tl_orig; + *distortion = d; *rate = r; *skippable = s; +#if CONFIG_TX32X32 + } +#endif +} - xd->left_context = tl; - xd->above_context = ta; - memcpy(xd->above_context, t_above, sizeof(t_above)); - memcpy(xd->left_context, t_left, sizeof(t_left)); +static void super_block_64_uvrd(MACROBLOCK *x, + int *rate, + int *distortion, + int *skippable) { + MACROBLOCKD *const xd = &x->e_mbd; + MB_MODE_INFO *const mbmi = &xd->mode_info_context->mbmi; + const uint8_t *usrc = x->src.u_buffer, *udst = xd->dst.u_buffer; + const uint8_t *vsrc = x->src.v_buffer, *vdst = xd->dst.v_buffer; + int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride; + ENTROPY_CONTEXT_PLANES t_above[4], t_left[4]; + ENTROPY_CONTEXT_PLANES *ta_orig = xd->above_context; + ENTROPY_CONTEXT_PLANES *tl_orig = xd->left_context; + int d = 0, r = 0, n, s = 1; + + memcpy(t_above, xd->above_context, sizeof(t_above)); + memcpy(t_left, xd->left_context, sizeof(t_left)); + +#if CONFIG_TX32X32 + if (mbmi->txfm_size == TX_32X32) { + int n; + + *rate = 0; + for (n = 0; n < 4; n++) { + int x_idx = n & 1, y_idx = n >> 1; + int r_tmp, d_tmp, s_tmp; + + vp9_subtract_sbuv_s_c(x->sb_coeff_data.src_diff, + usrc + x_idx * 16 + y_idx * 16 * src_uv_stride, + vsrc + x_idx * 16 + y_idx * 16 * src_uv_stride, + src_uv_stride, + udst + x_idx * 16 + y_idx * 16 * dst_uv_stride, + vdst + x_idx * 16 + y_idx * 16 * dst_uv_stride, + dst_uv_stride); + xd->above_context = t_above + x_idx * 2; + xd->left_context = t_left + y_idx * 2; + rd_inter32x32_uv_16x16(x, &r_tmp, &d_tmp, &s_tmp, 0); + r += r_tmp; + d += d_tmp; + s = s && s_tmp; + } + } else { +#endif + for (n = 0; n < 16; n++) { + int x_idx = n & 3, y_idx = n >> 2; + + vp9_subtract_mbuv_s_c(x->src_diff, + usrc + x_idx * 8 + y_idx * 8 * src_uv_stride, + vsrc + x_idx * 8 + y_idx * 8 * src_uv_stride, + src_uv_stride, + udst + x_idx * 8 + y_idx * 8 * dst_uv_stride, + vdst + x_idx * 8 + y_idx * 8 * dst_uv_stride, + dst_uv_stride); + if (mbmi->txfm_size == TX_4X4) { + vp9_transform_mbuv_4x4(x); + vp9_quantize_mbuv_4x4(x); + s &= vp9_mbuv_is_skippable_4x4(xd); + } else { + vp9_transform_mbuv_8x8(x); + vp9_quantize_mbuv_8x8(x); + s &= vp9_mbuv_is_skippable_8x8(xd); + } + + xd->above_context = t_above + x_idx; + xd->left_context = t_left + y_idx; + d += vp9_mbuverror(x) >> 2; + if (mbmi->txfm_size == TX_4X4) { + r += rd_cost_mbuv_4x4(x, 0); + } else { + r += rd_cost_mbuv_8x8(x, 0); + } + } #if CONFIG_TX32X32 } #endif + + *distortion = d; + *rate = r; + *skippable = s; + + xd->left_context = tl_orig; + xd->above_context = ta_orig; } static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, @@ -2072,6 +2318,45 @@ static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, return best_rd; } + +#if CONFIG_SUPERBLOCKS64 +static int64_t rd_pick_intra_sb64uv_mode(VP9_COMP *cpi, + MACROBLOCK *x, + int *rate, + int *rate_tokenonly, + int *distortion, + int *skippable) { + MB_PREDICTION_MODE mode; + MB_PREDICTION_MODE UNINITIALIZED_IS_SAFE(mode_selected); + int64_t best_rd = INT64_MAX, this_rd; + int this_rate_tokenonly, this_rate; + int this_distortion, s; + + for (mode = DC_PRED; mode <= TM_PRED; mode++) { + x->e_mbd.mode_info_context->mbmi.uv_mode = mode; + vp9_build_intra_predictors_sb64uv_s(&x->e_mbd); + + super_block_64_uvrd(x, &this_rate_tokenonly, + &this_distortion, &s); + this_rate = this_rate_tokenonly + + x->intra_uv_mode_cost[x->e_mbd.frame_type][mode]; + this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); + + if (this_rd < best_rd) { + mode_selected = mode; + best_rd = this_rd; + *rate = this_rate; + *rate_tokenonly = this_rate_tokenonly; + *distortion = this_distortion; + *skippable = s; + } + } + + x->e_mbd.mode_info_context->mbmi.uv_mode = mode_selected; + + return best_rd; +} +#endif // CONFIG_SUPERBLOCKS64 #endif int vp9_cost_mv_ref(VP9_COMP *cpi, @@ -3161,8 +3446,6 @@ static void inter_mode_cost(VP9_COMP *cpi, MACROBLOCK *x, *skippable = y_skippable && uv_skippable; } -#define MIN(x,y) (((x)<(y))?(x):(y)) -#define MAX(x,y) (((x)>(y))?(x):(y)) static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, int idx, MV_REFERENCE_FRAME frame_type, int block_size, @@ -3367,23 +3650,35 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } #endif - if (block_size == BLOCK_16X16) { - vp9_build_1st_inter16x16_predictors_mby(xd, xd->predictor, 16, 0); - if (is_comp_pred) - vp9_build_2nd_inter16x16_predictors_mby(xd, xd->predictor, 16); -#if CONFIG_COMP_INTERINTRA_PRED - if (is_comp_interintra_pred) { - vp9_build_interintra_16x16_predictors_mby(xd, xd->predictor, 16); - } -#endif - } else { #if CONFIG_SUPERBLOCKS +#if CONFIG_SUPERBLOCKS64 + if (block_size == BLOCK_64X64) { + vp9_build_inter64x64_predictors_sb(xd, + xd->dst.y_buffer, + xd->dst.u_buffer, + xd->dst.v_buffer, + xd->dst.y_stride, + xd->dst.uv_stride); + } else +#endif // CONFIG_SUPERBLOCKS64 + if (block_size == BLOCK_32X32) { vp9_build_inter32x32_predictors_sb(xd, xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.y_stride, xd->dst.uv_stride); + } else +#endif // CONFIG_SUPERBLOCKS + { + assert(block_size == BLOCK_16X16); + vp9_build_1st_inter16x16_predictors_mby(xd, xd->predictor, 16, 0); + if (is_comp_pred) + vp9_build_2nd_inter16x16_predictors_mby(xd, xd->predictor, 16); +#if CONFIG_COMP_INTERINTRA_PRED + if (is_comp_interintra_pred) { + vp9_build_interintra_16x16_predictors_mby(xd, xd->predictor, 16); + } #endif } @@ -3397,14 +3692,22 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, if (threshold < x->encode_breakout) threshold = x->encode_breakout; - if (block_size == BLOCK_16X16) { - var = vp9_variance16x16(*(b->base_src), b->src_stride, - xd->predictor, 16, &sse); - } else { #if CONFIG_SUPERBLOCKS +#if CONFIG_SUPERBLOCKS64 + if (block_size == BLOCK_64X64) { + var = vp9_variance64x64(*(b->base_src), b->src_stride, + xd->dst.y_buffer, xd->dst.y_stride, &sse); + } else +#endif // CONFIG_SUPERBLOCKS64 + if (block_size == BLOCK_32X32) { var = vp9_variance32x32(*(b->base_src), b->src_stride, xd->dst.y_buffer, xd->dst.y_stride, &sse); -#endif + } else +#endif // CONFIG_SUPERBLOCK + { + assert(block_size == BLOCK_16X16); + var = vp9_variance16x16(*(b->base_src), b->src_stride, + xd->predictor, 16, &sse); } if ((int)sse < threshold) { @@ -3416,15 +3719,29 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, // Check u and v to make sure skip is ok int sse2; - if (block_size == BLOCK_16X16) { - sse2 = vp9_uvsse(x); - } else { +#if CONFIG_SUPERBLOCKS +#if CONFIG_SUPERBLOCKS64 + if (block_size == BLOCK_64X64) { + unsigned int sse2u, sse2v; + var = vp9_variance32x32(x->src.u_buffer, x->src.uv_stride, + xd->dst.u_buffer, xd->dst.uv_stride, &sse2u); + var = vp9_variance32x32(x->src.v_buffer, x->src.uv_stride, + xd->dst.v_buffer, xd->dst.uv_stride, &sse2v); + sse2 = sse2u + sse2v; + } else +#endif // CONFIG_SUPERBLOCKS64 + if (block_size == BLOCK_32X32) { unsigned int sse2u, sse2v; var = vp9_variance16x16(x->src.u_buffer, x->src.uv_stride, xd->dst.u_buffer, xd->dst.uv_stride, &sse2u); var = vp9_variance16x16(x->src.v_buffer, x->src.uv_stride, xd->dst.v_buffer, xd->dst.uv_stride, &sse2v); sse2 = sse2u + sse2v; + } else +#endif // CONFIG_SUPERBLOCKS + { + assert(block_size == BLOCK_16X16); + sse2 = vp9_uvsse(x); } if (sse2 * 2 < threshold) { @@ -3455,23 +3772,26 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } if (!x->skip) { - if (block_size == BLOCK_16X16) { - vp9_build_1st_inter16x16_predictors_mbuv(xd, &xd->predictor[256], - &xd->predictor[320], 8); - if (is_comp_pred) - vp9_build_2nd_inter16x16_predictors_mbuv(xd, &xd->predictor[256], - &xd->predictor[320], 8); -#if CONFIG_COMP_INTERINTRA_PRED - if (is_comp_interintra_pred) { - vp9_build_interintra_16x16_predictors_mbuv(xd, &xd->predictor[256], - &xd->predictor[320], 8); - } -#endif - inter_mode_cost(cpi, x, rate2, distortion, - rate_y, distortion_y, rate_uv, distortion_uv, - skippable, txfm_cache); - } else { #if CONFIG_SUPERBLOCKS +#if CONFIG_SUPERBLOCKS64 + if (block_size == BLOCK_64X64) { + int skippable_y, skippable_uv; + + // Y cost and distortion + super_block_64_yrd(cpi, x, rate_y, distortion_y, + &skippable_y, txfm_cache); + *rate2 += *rate_y; + *distortion += *distortion_y; + + rd_inter64x64_uv(cpi, x, rate_uv, distortion_uv, + cm->full_pixel, &skippable_uv); + + *rate2 += *rate_uv; + *distortion += *distortion_uv; + *skippable = skippable_y && skippable_uv; + } else +#endif // CONFIG_SUPERBLOCKS64 + if (block_size == BLOCK_32X32) { int skippable_y, skippable_uv; // Y cost and distortion @@ -3486,7 +3806,25 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, *rate2 += *rate_uv; *distortion += *distortion_uv; *skippable = skippable_y && skippable_uv; + } else +#endif // CONFIG_SUPERBLOCKS + { + assert(block_size == BLOCK_16X16); + + vp9_build_1st_inter16x16_predictors_mbuv(xd, &xd->predictor[256], + &xd->predictor[320], 8); + if (is_comp_pred) + vp9_build_2nd_inter16x16_predictors_mbuv(xd, &xd->predictor[256], + &xd->predictor[320], 8); +#if CONFIG_COMP_INTERINTRA_PRED + if (is_comp_interintra_pred) { + vp9_build_interintra_16x16_predictors_mbuv(xd, &xd->predictor[256], + &xd->predictor[320], 8); + } #endif + inter_mode_cost(cpi, x, rate2, distortion, + rate_y, distortion_y, rate_uv, distortion_uv, + skippable, txfm_cache); } } return this_rd; // if 0, this will be re-calculated by caller @@ -3554,7 +3892,8 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, vpx_memset(&frame_mv, 0, sizeof(frame_mv)); vpx_memset(&best_mbmode, 0, sizeof(best_mbmode)); vpx_memset(&best_bmodes, 0, sizeof(best_bmodes)); - vpx_memset(&x->mb_context[xd->mb_index], 0, sizeof(PICK_MODE_CONTEXT)); + vpx_memset(&x->mb_context[xd->sb_index][xd->mb_index], 0, + sizeof(PICK_MODE_CONTEXT)); for (i = 0; i < MAX_REF_FRAMES; i++) frame_mv[NEWMV][i].as_int = INVALID_MV; @@ -3787,7 +4126,7 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, #if CONFIG_COMP_INTRA_PRED 0, #endif - 0); + cpi->update_context); rate2 += rate; distortion2 += distortion; @@ -4298,18 +4637,18 @@ static void rd_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } end: - store_coding_context( - x, &x->mb_context[xd->mb_index], best_mode_index, &best_partition, - &mbmi->ref_mvs[mbmi->ref_frame][0], - &mbmi->ref_mvs[mbmi->second_ref_frame < 0 - ? 0 : mbmi->second_ref_frame][0], - best_pred_diff, best_txfm_diff); + store_coding_context(x, &x->mb_context[xd->sb_index][xd->mb_index], + best_mode_index, &best_partition, + &mbmi->ref_mvs[mbmi->ref_frame][0], + &mbmi->ref_mvs[mbmi->second_ref_frame < 0 ? 0 : + mbmi->second_ref_frame][0], + best_pred_diff, best_txfm_diff); } #if CONFIG_SUPERBLOCKS -void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, - int *returnrate, - int *returndist) { +void vp9_rd_pick_intra_mode_sb32(VP9_COMP *cpi, MACROBLOCK *x, + int *returnrate, + int *returndist) { VP9_COMMON *cm = &cpi->common; MACROBLOCKD *xd = &x->e_mbd; int rate_y, rate_uv; @@ -4335,6 +4674,37 @@ void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, *returndist = dist_y + (dist_uv >> 2); } } + +#if CONFIG_SUPERBLOCKS64 +void vp9_rd_pick_intra_mode_sb64(VP9_COMP *cpi, MACROBLOCK *x, + int *returnrate, + int *returndist) { + VP9_COMMON *cm = &cpi->common; + MACROBLOCKD *xd = &x->e_mbd; + int rate_y, rate_uv; + int rate_y_tokenonly, rate_uv_tokenonly; + int error_y, error_uv; + int dist_y, dist_uv; + int y_skip, uv_skip; + int64_t txfm_cache[NB_TXFM_MODES]; + + error_y = rd_pick_intra_sb64y_mode(cpi, x, &rate_y, &rate_y_tokenonly, + &dist_y, &y_skip, txfm_cache); + error_uv = rd_pick_intra_sb64uv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, + &dist_uv, &uv_skip); + + if (cpi->common.mb_no_coeff_skip && y_skip && uv_skip) { + *returnrate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly + + vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 1); + *returndist = dist_y + (dist_uv >> 2); + } else { + *returnrate = rate_y + rate_uv; + if (cm->mb_no_coeff_skip) + *returnrate += vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0); + *returndist = dist_y + (dist_uv >> 2); + } +} +#endif #endif void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, @@ -4409,11 +4779,12 @@ void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, #if CONFIG_COMP_INTRA_PRED 0, #endif - 0); + cpi->update_context); #if CONFIG_COMP_INTRA_PRED error4x4d = rd_pick_intra4x4mby_modes(cpi, x, &rate4x4d, &rate4x4_tokenonly, - &dist4x4d, error16x16, 1, 0); + &dist4x4d, error16x16, 1, + cpi->update_context); #endif mbmi->mb_skip_coeff = 0; @@ -4426,8 +4797,8 @@ void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 1); dist = dist16x16 + (distuv8x8 >> 2); mbmi->txfm_size = txfm_size_16x16; - memset(x->mb_context[xd->mb_index].txfm_rd_diff, 0, - sizeof(x->mb_context[xd->mb_index].txfm_rd_diff)); + memset(x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff, 0, + sizeof(x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff)); } else if (error8x8 > error16x16) { if (error4x4 < error16x16) { rate = rateuv; @@ -4444,15 +4815,16 @@ void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, mbmi->mode = B_PRED; mbmi->txfm_size = TX_4X4; dist = dist4x4 + (distuv >> 2); - memset(x->mb_context[xd->mb_index].txfm_rd_diff, 0, - sizeof(x->mb_context[xd->mb_index].txfm_rd_diff)); + memset(x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff, 0, + sizeof(x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff)); } else { mbmi->txfm_size = txfm_size_16x16; mbmi->mode = mode16x16; rate = rate16x16 + rateuv8x8; dist = dist16x16 + (distuv8x8 >> 2); for (i = 0; i < NB_TXFM_MODES; i++) { - x->mb_context[xd->mb_index].txfm_rd_diff[i] = error16x16 - txfm_cache[i]; + x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff[i] = + error16x16 - txfm_cache[i]; } } if (cpi->common.mb_no_coeff_skip) @@ -4473,8 +4845,8 @@ void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, mbmi->mode = B_PRED; mbmi->txfm_size = TX_4X4; dist = dist4x4 + (distuv >> 2); - memset(x->mb_context[xd->mb_index].txfm_rd_diff, 0, - sizeof(x->mb_context[xd->mb_index].txfm_rd_diff)); + memset(x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff, 0, + sizeof(x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff)); } else { // FIXME(rbultje) support transform-size selection mbmi->mode = I8X8_PRED; @@ -4482,8 +4854,8 @@ void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, set_i8x8_block_modes(x, mode8x8); rate = rate8x8 + rateuv; dist = dist8x8 + (distuv >> 2); - memset(x->mb_context[xd->mb_index].txfm_rd_diff, 0, - sizeof(x->mb_context[xd->mb_index].txfm_rd_diff)); + memset(x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff, 0, + sizeof(x->mb_context[xd->sb_index][xd->mb_index].txfm_rd_diff)); } if (cpi->common.mb_no_coeff_skip) rate += vp9_cost_bit(vp9_get_pred_prob(cm, xd, PRED_MBSKIP), 0); @@ -4494,9 +4866,11 @@ void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, } #if CONFIG_SUPERBLOCKS -int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, - int recon_yoffset, int recon_uvoffset, - int *returnrate, int *returndistortion) { +static int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, + int recon_yoffset, int recon_uvoffset, + int *returnrate, + int *returndistortion, + int block_size) { VP9_COMMON *cm = &cpi->common; MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; @@ -4556,7 +4930,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) { if (cpi->ref_frame_flags & flag_list[ref_frame]) { - setup_buffer_inter(cpi, x, idx_list[ref_frame], ref_frame, BLOCK_32X32, + setup_buffer_inter(cpi, x, idx_list[ref_frame], ref_frame, block_size, recon_yoffset, recon_uvoffset, frame_mv[NEARESTMV], frame_mv[NEARMV], frame_mdcounts, y_buffer, u_buffer, v_buffer); @@ -4565,27 +4939,56 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, frame_mv[ZEROMV][ref_frame].as_int = 0; } - mbmi->mode = DC_PRED; - if (cm->txfm_mode == ONLY_4X4 || cm->txfm_mode == TX_MODE_SELECT) { - mbmi->txfm_size = TX_4X4; - rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_4x4, &rate_uv_tokenonly_4x4, - &dist_uv_4x4, &uv_skip_4x4); - mode_uv_4x4 = mbmi->uv_mode; - } - if (cm->txfm_mode != ONLY_4X4) { - mbmi->txfm_size = TX_8X8; - rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_8x8, &rate_uv_tokenonly_8x8, - &dist_uv_8x8, &uv_skip_8x8); - mode_uv_8x8 = mbmi->uv_mode; - } +#if CONFIG_SUPERBLOCKS64 + if (block_size == BLOCK_64X64) { + mbmi->mode = DC_PRED; + if (cm->txfm_mode == ONLY_4X4 || cm->txfm_mode == TX_MODE_SELECT) { + mbmi->txfm_size = TX_4X4; + rd_pick_intra_sb64uv_mode(cpi, x, &rate_uv_4x4, &rate_uv_tokenonly_4x4, + &dist_uv_4x4, &uv_skip_4x4); + mode_uv_4x4 = mbmi->uv_mode; + } + if (cm->txfm_mode != ONLY_4X4) { + mbmi->txfm_size = TX_8X8; + rd_pick_intra_sb64uv_mode(cpi, x, &rate_uv_8x8, &rate_uv_tokenonly_8x8, + &dist_uv_8x8, &uv_skip_8x8); + mode_uv_8x8 = mbmi->uv_mode; + } #if CONFIG_TX32X32 - if (cm->txfm_mode >= ALLOW_32X32) { - mbmi->txfm_size = TX_32X32; - rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_16x16, &rate_uv_tokenonly_16x16, - &dist_uv_16x16, &uv_skip_16x16); - mode_uv_16x16 = mbmi->uv_mode; + if (cm->txfm_mode >= ALLOW_32X32) { + mbmi->txfm_size = TX_32X32; + rd_pick_intra_sb64uv_mode(cpi, x, &rate_uv_16x16, + &rate_uv_tokenonly_16x16, + &dist_uv_16x16, &uv_skip_16x16); + mode_uv_16x16 = mbmi->uv_mode; + } +#endif // CONFIG_TX32X32 + } else +#endif // CONFIG_SUPERBLOCKS64 + { + assert(block_size == BLOCK_32X32); + mbmi->mode = DC_PRED; + if (cm->txfm_mode == ONLY_4X4 || cm->txfm_mode == TX_MODE_SELECT) { + mbmi->txfm_size = TX_4X4; + rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_4x4, &rate_uv_tokenonly_4x4, + &dist_uv_4x4, &uv_skip_4x4); + mode_uv_4x4 = mbmi->uv_mode; + } + if (cm->txfm_mode != ONLY_4X4) { + mbmi->txfm_size = TX_8X8; + rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_8x8, &rate_uv_tokenonly_8x8, + &dist_uv_8x8, &uv_skip_8x8); + mode_uv_8x8 = mbmi->uv_mode; + } +#if CONFIG_TX32X32 + if (cm->txfm_mode >= ALLOW_32X32) { + mbmi->txfm_size = TX_32X32; + rd_pick_intra_sbuv_mode(cpi, x, &rate_uv_16x16, &rate_uv_tokenonly_16x16, + &dist_uv_16x16, &uv_skip_16x16); + mode_uv_16x16 = mbmi->uv_mode; + } +#endif // CONFIG_TX32X32 } -#endif for (mode_index = 0; mode_index < MAX_MODES; mode_index += (!switchable_filter_index)) { @@ -4713,9 +5116,19 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } if (ref_frame == INTRA_FRAME) { - vp9_build_intra_predictors_sby_s(xd); - super_block_yrd(cpi, x, &rate_y, &distortion_y, - &skippable, txfm_cache); +#if CONFIG_SUPERBLOCKS64 + if (block_size == BLOCK_64X64) { + vp9_build_intra_predictors_sb64y_s(xd); + super_block_64_yrd(cpi, x, &rate_y, &distortion_y, + &skippable, txfm_cache); + } else +#endif // CONFIG_SUPERBLOCKS64 + { + assert(block_size == BLOCK_32X32); + vp9_build_intra_predictors_sby_s(xd); + super_block_yrd(cpi, x, &rate_y, &distortion_y, + &skippable, txfm_cache); + } if (mbmi->txfm_size == TX_4X4) { rate_uv = rate_uv_4x4; distortion_uv = dist_uv_4x4; @@ -4727,7 +5140,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, distortion_uv = dist_uv_16x16; skippable = skippable && uv_skip_16x16; mbmi->uv_mode = mode_uv_16x16; -#endif +#endif // CONFIG_TX32X32 } else { rate_uv = rate_uv_8x8; distortion_uv = dist_uv_8x8; @@ -4749,7 +5162,7 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, #endif } #endif - this_rd = handle_inter_mode(cpi, x, BLOCK_32X32, + this_rd = handle_inter_mode(cpi, x, block_size, &saddone, near_sadidx, mdcounts, txfm_cache, &rate2, &distortion2, &skippable, &compmode_cost, @@ -5021,14 +5434,41 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, } end: - store_coding_context(x, &x->sb_context[0], best_mode_index, NULL, - &mbmi->ref_mvs[mbmi->ref_frame][0], - &mbmi->ref_mvs[mbmi->second_ref_frame < 0 - ? 0 : mbmi->second_ref_frame][0], - best_pred_diff, best_txfm_diff); + { +#if CONFIG_SUPERBLOCKS64 + PICK_MODE_CONTEXT *p = (block_size == BLOCK_32X32) ? + &x->sb32_context[xd->sb_index] : + &x->sb64_context; +#else + PICK_MODE_CONTEXT *p = &x->sb32_context[xd->sb_index]; +#endif + store_coding_context(x, p, best_mode_index, NULL, + &mbmi->ref_mvs[mbmi->ref_frame][0], + &mbmi->ref_mvs[mbmi->second_ref_frame < 0 ? 0 : + mbmi->second_ref_frame][0], + best_pred_diff, best_txfm_diff); + } return best_rd; } + +int64_t vp9_rd_pick_inter_mode_sb32(VP9_COMP *cpi, MACROBLOCK *x, + int recon_yoffset, int recon_uvoffset, + int *returnrate, + int *returndistortion) { + return vp9_rd_pick_inter_mode_sb(cpi, x, recon_yoffset, recon_uvoffset, + returnrate, returndistortion, BLOCK_32X32); +} + +#if CONFIG_SUPERBLOCKS64 +int64_t vp9_rd_pick_inter_mode_sb64(VP9_COMP *cpi, MACROBLOCK *x, + int recon_yoffset, int recon_uvoffset, + int *returnrate, + int *returndistortion) { + return vp9_rd_pick_inter_mode_sb(cpi, x, recon_yoffset, recon_uvoffset, + returnrate, returndistortion, BLOCK_64X64); +} +#endif // CONFIG_SUPERBLOCKS64 #endif void vp9_pick_mode_inter_macroblock(VP9_COMP *cpi, MACROBLOCK *x, @@ -5063,8 +5503,8 @@ void vp9_pick_mode_inter_macroblock(VP9_COMP *cpi, MACROBLOCK *x, // vp9_pick_inter_mode // Store metrics so they can be added in to totals if this mode is picked - x->mb_context[xd->mb_index].distortion = distortion; - x->mb_context[xd->mb_index].intra_error = intra_error; + x->mb_context[xd->sb_index][xd->mb_index].distortion = distortion; + x->mb_context[xd->sb_index][xd->mb_index].intra_error = intra_error; *totalrate = rate; *totaldist = distortion; diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h index 4c2c33a..8ee2c0b 100644 --- a/vp9/encoder/vp9_rdopt.h +++ b/vp9/encoder/vp9_rdopt.h @@ -22,16 +22,23 @@ extern void vp9_initialize_me_consts(VP9_COMP *cpi, int QIndex); extern void vp9_rd_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, int *r, int *d); -extern void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, - int *r, int *d); +extern void vp9_rd_pick_intra_mode_sb32(VP9_COMP *cpi, MACROBLOCK *x, + int *r, int *d); + +extern void vp9_rd_pick_intra_mode_sb64(VP9_COMP *cpi, MACROBLOCK *x, + int *r, int *d); extern void vp9_pick_mode_inter_macroblock(VP9_COMP *cpi, MACROBLOCK *x, - int recon_yoffset, - int recon_uvoffset, int *r, int *d); + int ref_yoffset, int ref_uvoffset, + int *r, int *d); + +extern int64_t vp9_rd_pick_inter_mode_sb32(VP9_COMP *cpi, MACROBLOCK *x, + int ref_yoffset, int ref_uvoffset, + int *r, int *d); -extern int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, - int recon_yoffset, int recon_uvoffset, - int *returnrate, int *returndist); +extern int64_t vp9_rd_pick_inter_mode_sb64(VP9_COMP *cpi, MACROBLOCK *x, + int ref_yoffset, int ref_uvoffset, + int *r, int *d); extern void vp9_init_me_luts(); diff --git a/vp9/encoder/vp9_sad_c.c b/vp9/encoder/vp9_sad_c.c index e5249e5..9ce27fb 100644 --- a/vp9/encoder/vp9_sad_c.c +++ b/vp9/encoder/vp9_sad_c.c @@ -14,6 +14,14 @@ #include "vpx_ports/config.h" #include "vpx/vpx_integer.h" +unsigned int vp9_sad64x64_c(const uint8_t *src_ptr, + int src_stride, + const uint8_t *ref_ptr, + int ref_stride, + int max_sad) { + return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 64, 64); +} + unsigned int vp9_sad32x32_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, @@ -64,6 +72,19 @@ unsigned int vp9_sad4x4_c(const uint8_t *src_ptr, return sad_mx_n_c(src_ptr, src_stride, ref_ptr, ref_stride, 4, 4); } +void vp9_sad64x64x3_c(const uint8_t *src_ptr, + int src_stride, + const uint8_t *ref_ptr, + int ref_stride, + unsigned int *sad_array) { + sad_array[0] = vp9_sad64x64_c(src_ptr, src_stride, + ref_ptr, ref_stride, 0x7fffffff); + sad_array[1] = vp9_sad64x64_c(src_ptr, src_stride, + ref_ptr + 1, ref_stride, 0x7fffffff); + sad_array[2] = vp9_sad64x64_c(src_ptr, src_stride, + ref_ptr + 2, ref_stride, 0x7fffffff); +} + void vp9_sad32x32x3_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, @@ -77,6 +98,37 @@ void vp9_sad32x32x3_c(const uint8_t *src_ptr, ref_ptr + 2, ref_stride, 0x7fffffff); } +void vp9_sad64x64x8_c(const uint8_t *src_ptr, + int src_stride, + const uint8_t *ref_ptr, + int ref_stride, + uint16_t *sad_array) { + sad_array[0] = (uint16_t)vp9_sad64x64_c(src_ptr, src_stride, + ref_ptr, ref_stride, + 0x7fffffff); + sad_array[1] = (uint16_t)vp9_sad64x64_c(src_ptr, src_stride, + ref_ptr + 1, ref_stride, + 0x7fffffff); + sad_array[2] = (uint16_t)vp9_sad64x64_c(src_ptr, src_stride, + ref_ptr + 2, ref_stride, + 0x7fffffff); + sad_array[3] = (uint16_t)vp9_sad64x64_c(src_ptr, src_stride, + ref_ptr + 3, ref_stride, + 0x7fffffff); + sad_array[4] = (uint16_t)vp9_sad64x64_c(src_ptr, src_stride, + ref_ptr + 4, ref_stride, + 0x7fffffff); + sad_array[5] = (uint16_t)vp9_sad64x64_c(src_ptr, src_stride, + ref_ptr + 5, ref_stride, + 0x7fffffff); + sad_array[6] = (uint16_t)vp9_sad64x64_c(src_ptr, src_stride, + ref_ptr + 6, ref_stride, + 0x7fffffff); + sad_array[7] = (uint16_t)vp9_sad64x64_c(src_ptr, src_stride, + ref_ptr + 7, ref_stride, + 0x7fffffff); +} + void vp9_sad32x32x8_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, @@ -328,6 +380,21 @@ void vp9_sad4x4x8_c(const uint8_t *src_ptr, 0x7fffffff); } +void vp9_sad64x64x4d_c(const uint8_t *src_ptr, + int src_stride, + uint8_t *ref_ptr[], + int ref_stride, + unsigned int *sad_array) { + sad_array[0] = vp9_sad64x64_c(src_ptr, src_stride, + ref_ptr[0], ref_stride, 0x7fffffff); + sad_array[1] = vp9_sad64x64_c(src_ptr, src_stride, + ref_ptr[1], ref_stride, 0x7fffffff); + sad_array[2] = vp9_sad64x64_c(src_ptr, src_stride, + ref_ptr[2], ref_stride, 0x7fffffff); + sad_array[3] = vp9_sad64x64_c(src_ptr, src_stride, + ref_ptr[3], ref_stride, 0x7fffffff); +} + void vp9_sad32x32x4d_c(const uint8_t *src_ptr, int src_stride, uint8_t *ref_ptr[], diff --git a/vp9/encoder/vp9_segmentation.c b/vp9/encoder/vp9_segmentation.c index ee90f4f..19529fc 100644 --- a/vp9/encoder/vp9_segmentation.c +++ b/vp9/encoder/vp9_segmentation.c @@ -141,21 +141,57 @@ static int cost_segmap(MACROBLOCKD *xd, segcounts[3] * vp9_cost_one(probs[2]); return cost; +} + +static void count_segs(VP9_COMP *cpi, + MODE_INFO *mi, + int *no_pred_segcounts, + int (*temporal_predictor_count)[2], + int *t_unpred_seg_counts, + int mb_size, int mb_row, int mb_col) { + VP9_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &cpi->mb.e_mbd; + const int segmap_index = mb_row * cm->mb_cols + mb_col; + const int segment_id = mi->mbmi.segment_id; + + xd->mode_info_context = mi; + xd->mb_to_top_edge = -((mb_row * 16) << 3); + xd->mb_to_left_edge = -((mb_col * 16) << 3); + xd->mb_to_bottom_edge = ((cm->mb_rows - mb_size - mb_row) * 16) << 3; + xd->mb_to_right_edge = ((cm->mb_cols - mb_size - mb_col) * 16) << 3; + + // Count the number of hits on each segment with no prediction + no_pred_segcounts[segment_id]++; + + // Temporal prediction not allowed on key frames + if (cm->frame_type != KEY_FRAME) { + // Test to see if the segment id matches the predicted value. + const int seg_predicted = + (segment_id == vp9_get_pred_mb_segid(cm, xd, segmap_index)); + // Get the segment id prediction context + const int pred_context = vp9_get_pred_context(cm, xd, PRED_SEG_ID); + + // Store the prediction status for this mb and update counts + // as appropriate + vp9_set_pred_flag(xd, PRED_SEG_ID, seg_predicted); + temporal_predictor_count[pred_context][seg_predicted]++; + + if (!seg_predicted) + // Update the "unpredicted" segment count + t_unpred_seg_counts[segment_id]++; + } } void vp9_choose_segmap_coding_method(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &cpi->mb.e_mbd; - int i; int no_pred_cost; int t_pred_cost = INT_MAX; - int pred_context; + int i; int mb_row, mb_col; - int segmap_index = 0; - unsigned char segment_id; int temporal_predictor_count[PREDICTION_PROBS][2]; int no_pred_segcounts[MAX_MB_SEGMENTS]; @@ -165,9 +201,8 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) { vp9_prob t_pred_tree[MB_FEATURE_TREE_PROBS]; vp9_prob t_nopred_prob[PREDICTION_PROBS]; -#if CONFIG_SUPERBLOCKS const int mis = cm->mode_info_stride; -#endif + MODE_INFO *mi_ptr = cm->mi, *mi; // Set default state for the segment tree probabilities and the // temporal coding probabilities @@ -183,87 +218,57 @@ void vp9_choose_segmap_coding_method(VP9_COMP *cpi) { // First of all generate stats regarding how well the last segment map // predicts this one - // Initialize macroblock decoder mode info context for the first mb - // in the frame - xd->mode_info_context = cm->mi; - - for (mb_row = 0; mb_row < cm->mb_rows; mb_row += 2) { - for (mb_col = 0; mb_col < cm->mb_cols; mb_col += 2) { - for (i = 0; i < 4; i++) { - static const int dx[4] = { +1, -1, +1, +1 }; - static const int dy[4] = { 0, +1, 0, -1 }; - int x_idx = i & 1, y_idx = i >> 1; - - if (mb_col + x_idx >= cm->mb_cols || - mb_row + y_idx >= cm->mb_rows) { - goto end; - } - - xd->mb_to_top_edge = -((mb_row * 16) << 3); - xd->mb_to_left_edge = -((mb_col * 16) << 3); - - segmap_index = (mb_row + y_idx) * cm->mb_cols + mb_col + x_idx; - segment_id = xd->mode_info_context->mbmi.segment_id; -#if CONFIG_SUPERBLOCKS - if (xd->mode_info_context->mbmi.encoded_as_sb) { - if (mb_col + 1 < cm->mb_cols) - segment_id = segment_id && - xd->mode_info_context[1].mbmi.segment_id; - if (mb_row + 1 < cm->mb_rows) { - segment_id = segment_id && - xd->mode_info_context[mis].mbmi.segment_id; - if (mb_col + 1 < cm->mb_cols) - segment_id = segment_id && - xd->mode_info_context[mis + 1].mbmi.segment_id; - } - xd->mb_to_bottom_edge = ((cm->mb_rows - 2 - mb_row) * 16) << 3; - xd->mb_to_right_edge = ((cm->mb_cols - 2 - mb_col) * 16) << 3; - } else { + for (mb_row = 0; mb_row < cm->mb_rows; mb_row += 4, mi_ptr += 4 * mis) { + mi = mi_ptr; + for (mb_col = 0; mb_col < cm->mb_cols; mb_col += 4, mi += 4) { +#if CONFIG_SUPERBLOCKS && CONFIG_SUPERBLOCKS64 + if (mi->mbmi.sb_type == BLOCK_SIZE_SB64X64) { + count_segs(cpi, mi, no_pred_segcounts, temporal_predictor_count, + t_unpred_seg_counts, 4, mb_row, mb_col); + } else #endif - xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3; - xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3; + { + for (i = 0; i < 4; i++) { + int x_idx = (i & 1) << 1, y_idx = i & 2; #if CONFIG_SUPERBLOCKS - } + MODE_INFO *sb_mi = mi + y_idx * mis + x_idx; #endif - // Count the number of hits on each segment with no prediction - no_pred_segcounts[segment_id]++; - - // Temporal prediction not allowed on key frames - if (cm->frame_type != KEY_FRAME) { - // Test to see if the segment id matches the predicted value. - int seg_predicted = - (segment_id == vp9_get_pred_mb_segid(cm, xd, segmap_index)); + if (mb_col + x_idx >= cm->mb_cols || + mb_row + y_idx >= cm->mb_rows) { + continue; + } - // Get the segment id prediction context - pred_context = - vp9_get_pred_context(cm, xd, PRED_SEG_ID); +#if CONFIG_SUPERBLOCKS + if (sb_mi->mbmi.sb_type) { + assert(sb_mi->mbmi.sb_type == BLOCK_SIZE_SB32X32); + count_segs(cpi, sb_mi, no_pred_segcounts, temporal_predictor_count, + t_unpred_seg_counts, 2, mb_row + y_idx, mb_col + x_idx); + } else +#endif + { + int j; - // Store the prediction status for this mb and update counts - // as appropriate - vp9_set_pred_flag(xd, PRED_SEG_ID, seg_predicted); - temporal_predictor_count[pred_context][seg_predicted]++; + for (j = 0; j < 4; j++) { + const int x_idx_mb = x_idx + (j & 1), y_idx_mb = y_idx + (j >> 1); + MODE_INFO *mb_mi = mi + x_idx_mb + y_idx_mb * mis; - if (!seg_predicted) - // Update the "unpredicted" segment count - t_unpred_seg_counts[segment_id]++; - } + if (mb_col + x_idx_mb >= cm->mb_cols || + mb_row + y_idx_mb >= cm->mb_rows) { + continue; + } #if CONFIG_SUPERBLOCKS - if (xd->mode_info_context->mbmi.encoded_as_sb) { - assert(!i); - xd->mode_info_context += 2; - break; - } + assert(mb_mi->mbmi.sb_type == BLOCK_SIZE_MB16X16); #endif - end: - xd->mode_info_context += dx[i] + dy[i] * cm->mode_info_stride; + count_segs(cpi, mb_mi, no_pred_segcounts, + temporal_predictor_count, t_unpred_seg_counts, + 1, mb_row + y_idx_mb, mb_col + x_idx_mb); + } + } + } } } - - // this is to account for the border in mode_info_context - xd->mode_info_context -= mb_col; - xd->mode_info_context += cm->mode_info_stride * 2; } // Work out probability tree for coding segments without prediction diff --git a/vp9/encoder/vp9_variance_c.c b/vp9/encoder/vp9_variance_c.c index ecb9257..9060d4c 100644 --- a/vp9/encoder/vp9_variance_c.c +++ b/vp9/encoder/vp9_variance_c.c @@ -25,6 +25,19 @@ unsigned int vp9_get_mb_ss_c(const int16_t *src_ptr) { } #if CONFIG_SUPERBLOCKS +unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, + int source_stride, + const uint8_t *ref_ptr, + int recon_stride, + unsigned int *sse) { + unsigned int var; + int avg; + + variance(src_ptr, source_stride, ref_ptr, recon_stride, 64, 64, &var, &avg); + *sse = var; + return (var - (((int64_t)avg * avg) >> 12)); +} + unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, @@ -185,6 +198,27 @@ unsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr, } #if CONFIG_SUPERBLOCKS +unsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + const uint8_t *dst_ptr, + int dst_pixels_per_line, + unsigned int *sse) { + uint16_t FData3[65 * 64]; // Temp data bufffer used in filtering + uint8_t temp2[68 * 64]; + const int16_t *HFilter, *VFilter; + + HFilter = vp9_bilinear_filters[xoffset]; + VFilter = vp9_bilinear_filters[yoffset]; + + var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, + 1, 65, 64, HFilter); + var_filter_block2d_bil_second_pass(FData3, temp2, 64, 64, 64, 64, VFilter); + + return vp9_variance64x64_c(temp2, 64, dst_ptr, dst_pixels_per_line, sse); +} + unsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr, int src_pixels_per_line, int xoffset, @@ -224,6 +258,15 @@ unsigned int vp9_variance_halfpixvar32x32_h_c(const uint8_t *src_ptr, return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 0, ref_ptr, recon_stride, sse); } + +unsigned int vp9_variance_halfpixvar64x64_h_c(const uint8_t *src_ptr, + int source_stride, + const uint8_t *ref_ptr, + int recon_stride, + unsigned int *sse) { + return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 0, + ref_ptr, recon_stride, sse); +} #endif @@ -245,6 +288,15 @@ unsigned int vp9_variance_halfpixvar32x32_v_c(const uint8_t *src_ptr, return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 0, 8, ref_ptr, recon_stride, sse); } + +unsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr, + int source_stride, + const uint8_t *ref_ptr, + int recon_stride, + unsigned int *sse) { + return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 0, 8, + ref_ptr, recon_stride, sse); +} #endif unsigned int vp9_variance_halfpixvar16x16_hv_c(const uint8_t *src_ptr, @@ -265,6 +317,15 @@ unsigned int vp9_variance_halfpixvar32x32_hv_c(const uint8_t *src_ptr, return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 8, ref_ptr, recon_stride, sse); } + +unsigned int vp9_variance_halfpixvar64x64_hv_c(const uint8_t *src_ptr, + int source_stride, + const uint8_t *ref_ptr, + int recon_stride, + unsigned int *sse) { + return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 8, + ref_ptr, recon_stride, sse); +} #endif unsigned int vp9_sub_pixel_mse16x16_c(const uint8_t *src_ptr, @@ -293,6 +354,19 @@ unsigned int vp9_sub_pixel_mse32x32_c(const uint8_t *src_ptr, dst_pixels_per_line, sse); return *sse; } + +unsigned int vp9_sub_pixel_mse64x64_c(const uint8_t *src_ptr, + int src_pixels_per_line, + int xoffset, + int yoffset, + const uint8_t *dst_ptr, + int dst_pixels_per_line, + unsigned int *sse) { + vp9_sub_pixel_variance64x64_c(src_ptr, src_pixels_per_line, + xoffset, yoffset, dst_ptr, + dst_pixels_per_line, sse); + return *sse; +} #endif unsigned int vp9_sub_pixel_variance16x8_c(const uint8_t *src_ptr, -- 2.7.4