From 1f80a568d24f91125ba10d9b7f6c543ab9b8700d Mon Sep 17 00:00:00 2001 From: John Koleszar Date: Tue, 30 Apr 2013 09:54:51 -0700 Subject: [PATCH] Make vp9_optimize_sb* common Unify the various vp9_optimize_sb functions into one that handles all transform sizes. Change-Id: I48b642fbfb3e72cc2e0bcf1d0317a80a80547882 --- vp9/common/vp9_blockd.h | 68 ++++++++++ vp9/decoder/vp9_decodframe.c | 15 --- vp9/encoder/vp9_encodeframe.c | 24 ++-- vp9/encoder/vp9_encodeintra.c | 10 +- vp9/encoder/vp9_encodemb.c | 290 ++++++++++++------------------------------ vp9/encoder/vp9_encodemb.h | 21 +-- 6 files changed, 173 insertions(+), 255 deletions(-) diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index 6c18387..9662804 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -1014,6 +1014,74 @@ static uint8_t* raster_block_offset_uint8(MACROBLOCKD *xd, return base + raster_block_offset(xd, bsize, plane, block, stride); } +static int txfrm_block_to_raster_block(MACROBLOCKD *xd, + BLOCK_SIZE_TYPE bsize, + int plane, int block, + int ss_txfrm_size) { + const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x; + const int txwl = ss_txfrm_size / 2; + const int tx_cols_lg2 = bwl - txwl; + const int tx_cols = 1 << tx_cols_lg2; + const int raster_mb = block >> ss_txfrm_size; + const int x = (raster_mb & (tx_cols - 1)) << (txwl); + const int y = raster_mb >> tx_cols_lg2 << (txwl); + return x + (y << bwl); +} + +static void txfrm_block_to_raster_xy(MACROBLOCKD *xd, + BLOCK_SIZE_TYPE bsize, + int plane, int block, + int ss_txfrm_size, + int *x, int *y) { + const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x; + const int txwl = ss_txfrm_size / 2; + const int tx_cols_lg2 = bwl - txwl; + const int tx_cols = 1 << tx_cols_lg2; + const int raster_mb = block >> ss_txfrm_size; + *x = (raster_mb & (tx_cols - 1)) << (txwl); + *y = raster_mb >> tx_cols_lg2 << (txwl); +} + +static TX_SIZE tx_size_for_plane(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize, + int plane) { + // TODO(jkoleszar): This duplicates a ton of code, but we're going to be + // moving this to a per-plane lookup shortly, and this will go away then. + if (!plane) { + return xd->mode_info_context->mbmi.txfm_size; + } else { + const int bw = b_width_log2(bsize), bh = b_height_log2(bsize); +#if !CONFIG_SB8X8 + const MB_PREDICTION_MODE mode = xd->mode_info_context->mbmi.mode; + const int is_split = + xd->mode_info_context->mbmi.txfm_size == TX_8X8 && + (mode == I8X8_PRED || mode == SPLITMV); +#endif + + // block and transform sizes, in number of 4x4 blocks log 2 ("*_b") + // 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8 + const TX_SIZE tx_size = xd->mode_info_context->mbmi.txfm_size; + const int block_size_b = bw + bh; + const int txfrm_size_b = tx_size * 2; + + // subsampled size of the block + const int ss_sum = xd->plane[plane].subsampling_x + + xd->plane[plane].subsampling_y; + const int ss_block_size = block_size_b - ss_sum; + + // size of the transform to use. scale the transform down if it's larger + // than the size of the subsampled data, or forced externally by the mb mode + const int ss_max = MAX(xd->plane[plane].subsampling_x, + xd->plane[plane].subsampling_y); + const int ss_txfrm_size = txfrm_size_b > ss_block_size +#if !CONFIG_SB8X8 + || is_split +#endif // !CONFIG_SB8X8 + ? txfrm_size_b - ss_max * 2 + : txfrm_size_b; + return (TX_SIZE)(ss_txfrm_size / 2); + } +} + #if CONFIG_CODE_ZEROGROUP static int get_zpc_used(TX_SIZE tx_size) { return (tx_size >= TX_16X16); diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index 12f8948..e16fee4 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -358,21 +358,6 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, vp9_reader *r) { } #endif -static int txfrm_block_to_raster_block(MACROBLOCKD *xd, - BLOCK_SIZE_TYPE bsize, - int plane, int block, - int ss_txfrm_size) { - const int bwl = b_width_log2(bsize) - xd->plane[plane].subsampling_x; - const int txwl = ss_txfrm_size / 2; - const int tx_cols_lg2 = bwl - txwl; - const int tx_cols = 1 << tx_cols_lg2; - const int raster_mb = block >> ss_txfrm_size; - const int x = (raster_mb & (tx_cols - 1)) << (txwl); - const int y = raster_mb >> tx_cols_lg2 << (txwl); - return x + (y << bwl); -} - - static void decode_block(int plane, int block, BLOCK_SIZE_TYPE bsize, int ss_txfrm_size, void *arg) { MACROBLOCKD* const xd = arg; diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 8fc1448..87b5bd5 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -2443,7 +2443,7 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, vp9_subtract_sbuv(x, bsize); vp9_transform_sbuv_4x4(x, bsize); vp9_quantize_sbuv_4x4(x, bsize); - vp9_optimize_sbuv_4x4(cm, x, bsize); + vp9_optimize_sbuv(cm, x, bsize); vp9_inverse_transform_sbuv_4x4(xd, bsize); vp9_recon_sbuv(xd, bsize); @@ -2507,11 +2507,11 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, vp9_quantize_sbuv_16x16(x, bsize); } if (x->optimize) { - vp9_optimize_sby_32x32(cm, x, bsize); + vp9_optimize_sby(cm, x, bsize); if (bsize == BLOCK_SIZE_SB64X64) - vp9_optimize_sbuv_32x32(cm, x, bsize); + vp9_optimize_sbuv(cm, x, bsize); else - vp9_optimize_sbuv_16x16(cm, x, bsize); + vp9_optimize_sbuv(cm, x, bsize); } vp9_inverse_transform_sby_32x32(xd, bsize); if (bsize == BLOCK_SIZE_SB64X64) @@ -2530,11 +2530,11 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, vp9_quantize_sbuv_8x8(x, bsize); } if (x->optimize) { - vp9_optimize_sby_16x16(cm, x, bsize); + vp9_optimize_sby(cm, x, bsize); if (bsize >= BLOCK_SIZE_SB32X32) - vp9_optimize_sbuv_16x16(cm, x, bsize); + vp9_optimize_sbuv(cm, x, bsize); else - vp9_optimize_sbuv_8x8(cm, x, bsize); + vp9_optimize_sbuv(cm, x, bsize); } vp9_inverse_transform_sby_16x16(xd, bsize); if (bsize >= BLOCK_SIZE_SB32X32) @@ -2546,19 +2546,19 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, vp9_transform_sby_8x8(x, bsize); vp9_quantize_sby_8x8(x, bsize); if (x->optimize) - vp9_optimize_sby_8x8(cm, x, bsize); + vp9_optimize_sby(cm, x, bsize); vp9_inverse_transform_sby_8x8(xd, bsize); if (bsize >= BLOCK_SIZE_MB16X16) { vp9_transform_sbuv_8x8(x, bsize); vp9_quantize_sbuv_8x8(x, bsize); if (x->optimize) - vp9_optimize_sbuv_8x8(cm, x, bsize); + vp9_optimize_sbuv(cm, x, bsize); vp9_inverse_transform_sbuv_8x8(xd, bsize); } else { vp9_transform_sbuv_4x4(x, bsize); vp9_quantize_sbuv_4x4(x, bsize); if (x->optimize) - vp9_optimize_sbuv_4x4(cm, x, bsize); + vp9_optimize_sbuv(cm, x, bsize); vp9_inverse_transform_sbuv_4x4(xd, bsize); } break; @@ -2568,8 +2568,8 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, vp9_quantize_sby_4x4(x, bsize); vp9_quantize_sbuv_4x4(x, bsize); if (x->optimize) { - vp9_optimize_sby_4x4(cm, x, bsize); - vp9_optimize_sbuv_4x4(cm, x, bsize); + vp9_optimize_sby(cm, x, bsize); + vp9_optimize_sbuv(cm, x, bsize); } vp9_inverse_transform_sby_4x4(xd, bsize); vp9_inverse_transform_sbuv_4x4(xd, bsize); diff --git a/vp9/encoder/vp9_encodeintra.c b/vp9/encoder/vp9_encodeintra.c index 58a6b2a..c5f29fe 100644 --- a/vp9/encoder/vp9_encodeintra.c +++ b/vp9/encoder/vp9_encodeintra.c @@ -114,21 +114,21 @@ void vp9_encode_intra16x16mby(VP9_COMMON *const cm, MACROBLOCK *x) { vp9_transform_sby_16x16(x, BLOCK_SIZE_MB16X16); vp9_quantize_sby_16x16(x, BLOCK_SIZE_MB16X16); if (x->optimize) - vp9_optimize_sby_16x16(cm, x, BLOCK_SIZE_MB16X16); + vp9_optimize_sby(cm, x, BLOCK_SIZE_MB16X16); vp9_inverse_transform_sby_16x16(xd, BLOCK_SIZE_MB16X16); break; case TX_8X8: vp9_transform_sby_8x8(x, BLOCK_SIZE_MB16X16); vp9_quantize_sby_8x8(x, BLOCK_SIZE_MB16X16); if (x->optimize) - vp9_optimize_sby_8x8(cm, x, BLOCK_SIZE_MB16X16); + vp9_optimize_sby(cm, x, BLOCK_SIZE_MB16X16); vp9_inverse_transform_sby_8x8(xd, BLOCK_SIZE_MB16X16); break; default: vp9_transform_sby_4x4(x, BLOCK_SIZE_MB16X16); vp9_quantize_sby_4x4(x, BLOCK_SIZE_MB16X16); if (x->optimize) - vp9_optimize_sby_4x4(cm, x, BLOCK_SIZE_MB16X16); + vp9_optimize_sby(cm, x, BLOCK_SIZE_MB16X16); vp9_inverse_transform_sby_4x4(xd, BLOCK_SIZE_MB16X16); break; } @@ -148,14 +148,14 @@ void vp9_encode_intra16x16mbuv(VP9_COMMON *const cm, MACROBLOCK *x) { vp9_transform_sbuv_4x4(x, BLOCK_SIZE_MB16X16); vp9_quantize_sbuv_4x4(x, BLOCK_SIZE_MB16X16); if (x->optimize) - vp9_optimize_sbuv_4x4(cm, x, BLOCK_SIZE_MB16X16); + vp9_optimize_sbuv(cm, x, BLOCK_SIZE_MB16X16); vp9_inverse_transform_sbuv_4x4(xd, BLOCK_SIZE_MB16X16); break; default: // 16x16 or 8x8 vp9_transform_sbuv_8x8(x, BLOCK_SIZE_MB16X16); vp9_quantize_sbuv_8x8(x, BLOCK_SIZE_MB16X16); if (x->optimize) - vp9_optimize_sbuv_8x8(cm, x, BLOCK_SIZE_MB16X16); + vp9_optimize_sbuv(cm, x, BLOCK_SIZE_MB16X16); vp9_inverse_transform_sbuv_8x8(xd, BLOCK_SIZE_MB16X16); break; } diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 5e7437e..15fd4f1 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -249,57 +249,53 @@ static int trellis_get_coeff_context(const int *scan, return pt; } -static void optimize_b(VP9_COMMON *const cm, - MACROBLOCK *mb, int ib, PLANE_TYPE type, - const int16_t *dequant_ptr, +static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb, + int plane, int block, BLOCK_SIZE_TYPE bsize, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, - int tx_size, int y_blocks) { + TX_SIZE tx_size) { const int ref = mb->e_mbd.mode_info_context->mbmi.ref_frame != INTRA_FRAME; MACROBLOCKD *const xd = &mb->e_mbd; vp9_token_state tokens[1025][2]; unsigned best_index[1025][2]; - const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, ib); - const int16_t *coeff_ptr = BLOCK_OFFSET(mb->plane[pb_idx.plane].coeff, - pb_idx.block, 16); + const int16_t *coeff_ptr = BLOCK_OFFSET(mb->plane[plane].coeff, + block, 16); int16_t *qcoeff_ptr; int16_t *dqcoeff_ptr; - int eob = xd->plane[pb_idx.plane].eobs[pb_idx.block], final_eob, sz = 0; + int eob = xd->plane[plane].eobs[block], final_eob, sz = 0; const int i0 = 0; int rc, x, next, i; int64_t rdmult, rddiv, rd_cost0, rd_cost1; int rate0, rate1, error0, error1, t0, t1; int best, band, pt; + PLANE_TYPE type = xd->plane[plane].plane_type; int err_mult = plane_rd_mult[type]; int default_eob, pad; int const *scan, *nb; const int mul = 1 + (tx_size == TX_32X32); uint8_t token_cache[1024]; + const int ib = txfrm_block_to_raster_block(xd, bsize, plane, + block, 2 * tx_size); + const int16_t *dequant_ptr = xd->plane[plane].dequant; - assert((!type && !pb_idx.plane) || (type && pb_idx.plane)); - dqcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].dqcoeff, pb_idx.block, 16); - qcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff, pb_idx.block, 16); + assert((!type && !plane) || (type && plane)); + dqcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block, 16); + qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff, block, 16); switch (tx_size) { default: case TX_4X4: { - const TX_TYPE tx_type = get_tx_type_4x4(xd, ib); + const TX_TYPE tx_type = plane == 0 ? get_tx_type_4x4(xd, ib) : DCT_DCT; default_eob = 16; scan = get_scan_4x4(tx_type); break; } case TX_8X8: { - const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type; - const int sz = 1 + b_width_log2(sb_type); - const int x = ib & ((1 << sz) - 1), y = ib - x; - const TX_TYPE tx_type = get_tx_type_8x8(xd, y + (x >> 1)); + const TX_TYPE tx_type = plane == 0 ? get_tx_type_8x8(xd, ib) : DCT_DCT; scan = get_scan_8x8(tx_type); default_eob = 64; break; } case TX_16X16: { - const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type; - const int sz = 2 + b_width_log2(sb_type); - const int x = ib & ((1 << sz) - 1), y = ib - x; - const TX_TYPE tx_type = get_tx_type_16x16(xd, y + (x >> 2)); + const TX_TYPE tx_type = plane == 0 ? get_tx_type_16x16(xd, ib) : DCT_DCT; scan = get_scan_16x16(tx_type); default_eob = 256; break; @@ -480,201 +476,81 @@ static void optimize_b(VP9_COMMON *const cm, } final_eob++; - xd->plane[pb_idx.plane].eobs[pb_idx.block] = final_eob; + xd->plane[plane].eobs[block] = final_eob; *a = *l = (final_eob > 0); } -void vp9_optimize_sby_32x32(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize) { - MACROBLOCKD *const xd = &x->e_mbd; - ENTROPY_CONTEXT *a = xd->plane[0].above_context; - ENTROPY_CONTEXT *l = xd->plane[0].left_context; - const int bwl = b_width_log2(bsize) - 3, bw = 1 << bwl; - const int bh = 1 << (b_height_log2(bsize) - 3); - ENTROPY_CONTEXT ta[2], tl[2]; - int n; - - for (n = 0; n < bw; n++, a += 8) - ta[n] = (a[0] + a[1] + a[2] + a[3] + a[4] + a[5] + a[6] + a[7]) != 0; - for (n = 0; n < bh; n++, l += 8) - tl[n] = (l[0] + l[1] + l[2] + l[3] + l[4] + l[5] + l[6] + l[7]) != 0; - - for (n = 0; n < bw * bh; n++) { - const int x_idx = n & (bw - 1), y_idx = n >> bwl; - - optimize_b(cm, x, n * 64, PLANE_TYPE_Y_WITH_DC, x->e_mbd.plane[0].dequant, - ta + x_idx, tl + y_idx, TX_32X32, 64 * bw * bh); - } -} - -void vp9_optimize_sby_16x16(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize) { - MACROBLOCKD *const xd = &x->e_mbd; - ENTROPY_CONTEXT *a = xd->plane[0].above_context; - ENTROPY_CONTEXT *l = xd->plane[0].left_context; - const int bwl = b_width_log2(bsize) - 2, bw = 1 << bwl; - const int bh = 1 << (b_height_log2(bsize) - 2); - ENTROPY_CONTEXT ta[4], tl[4]; - int n; - - for (n = 0; n < bw; n++, a += 4) - ta[n] = (a[0] + a[1] + a[2] + a[3]) != 0; - for (n = 0; n < bh; n++, l += 4) - tl[n] = (l[0] + l[1] + l[2] + l[3]) != 0; - - for (n = 0; n < bw * bh; n++) { - const int x_idx = n & (bw - 1), y_idx = n >> bwl; - - optimize_b(cm, x, n * 16, PLANE_TYPE_Y_WITH_DC, x->e_mbd.plane[0].dequant, - ta + x_idx, tl + y_idx, TX_16X16, 16 * bw * bh); - } -} - -void vp9_optimize_sby_8x8(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize) { - MACROBLOCKD *const xd = &x->e_mbd; - ENTROPY_CONTEXT *a = xd->plane[0].above_context; - ENTROPY_CONTEXT *l = xd->plane[0].left_context; - const int bwl = b_width_log2(bsize) - 1, bw = 1 << bwl; - const int bh = 1 << (b_height_log2(bsize) - 1); - ENTROPY_CONTEXT ta[8], tl[8]; - int n; - - for (n = 0; n < bw; n++, a += 2) - ta[n] = (a[0] + a[1]) != 0; - for (n = 0; n < bh; n++, l += 2) - tl[n] = (l[0] + l[1]) != 0; - - for (n = 0; n < bw * bh; n++) { - const int x_idx = n & (bw - 1), y_idx = n >> bwl; - - optimize_b(cm, x, n * 4, PLANE_TYPE_Y_WITH_DC, x->e_mbd.plane[0].dequant, - ta + x_idx, tl + y_idx, TX_8X8, 4 * bw * bh); - } -} - -void vp9_optimize_sby_4x4(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize) { - MACROBLOCKD *const xd = &x->e_mbd; - int bwl = b_width_log2(bsize), bw = 1 << bwl; - int bh = 1 << b_height_log2(bsize); - ENTROPY_CONTEXT ta[16], tl[16]; - int n; - - vpx_memcpy(ta, xd->plane[0].above_context, sizeof(ENTROPY_CONTEXT) * bw); - vpx_memcpy(tl, xd->plane[0].left_context, sizeof(ENTROPY_CONTEXT) * bh); +struct optimize_ctx { + ENTROPY_CONTEXT ta[MAX_MB_PLANE][16]; + ENTROPY_CONTEXT tl[MAX_MB_PLANE][16]; +}; - for (n = 0; n < bw * bh; n++) { - const int x_idx = n & (bw - 1), y_idx = n >> bwl; +struct optimize_block_args { + VP9_COMMON *cm; + MACROBLOCK *x; + struct optimize_ctx *ctx; +}; - optimize_b(cm, x, n, PLANE_TYPE_Y_WITH_DC, x->e_mbd.plane[0].dequant, - ta + x_idx, tl + y_idx, TX_4X4, bh * bw); - } -} +static void optimize_block(int plane, int block, BLOCK_SIZE_TYPE bsize, + int ss_txfrm_size, void *arg) { + const struct optimize_block_args* const args = arg; + MACROBLOCKD* const xd = &args->x->e_mbd; + int x, y; -void vp9_optimize_sbuv_32x32(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize) { - MACROBLOCKD *const xd = &x->e_mbd; - int b; + // find current entropy context + txfrm_block_to_raster_xy(xd, bsize, plane, block, ss_txfrm_size, &x, &y); - assert(bsize == BLOCK_SIZE_SB64X64); - for (b = 256; b < 384; b += 64) { - const int plane = 1 + (b >= 320); - ENTROPY_CONTEXT *a = xd->plane[plane].above_context; - ENTROPY_CONTEXT *l = xd->plane[plane].left_context; - ENTROPY_CONTEXT a_ec, l_ec; - - a_ec = (a[0] + a[1] + a[2] + a[3] + a[4] + a[5] + a[6] + a[7]) != 0; - l_ec = (l[0] + l[1] + l[2] + l[3] + l[4] + l[5] + l[6] + l[7]) != 0; - optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.plane[plane].dequant, - &a_ec, &l_ec, TX_32X32, 256); - } + optimize_b(args->cm, args->x, plane, block, bsize, + &args->ctx->ta[plane][x], &args->ctx->tl[plane][y], + ss_txfrm_size / 2); } -void vp9_optimize_sbuv_16x16(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize) { - MACROBLOCKD *const xd = &x->e_mbd; - const int bwl = b_width_log2(bsize) - 2, bhl = b_height_log2(bsize) - 2; - const int bw = 1 << (bwl - 1); - const int bh = 1 << (bhl - 1); - int uvoff = 16 << (bwl + bhl); - int plane, n; - - for (plane = 1; plane < MAX_MB_PLANE; plane++) { - ENTROPY_CONTEXT ta[2], *a = xd->plane[plane].above_context; - ENTROPY_CONTEXT tl[2], *l = xd->plane[plane].left_context; - - for (n = 0; n < bw; n++, a += 4) - ta[n] = (a[0] + a[1] + a[2] + a[3]) != 0; - for (n = 0; n < bh; n++, l += 4) - tl[n] = (l[0] + l[1] + l[2] + l[3]) != 0; - - for (n = 0; n < bw * bh; n++) { - const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1); - optimize_b(cm, x, uvoff + n * 16, PLANE_TYPE_UV, - x->e_mbd.plane[plane].dequant, - &ta[x_idx], &tl[y_idx], - TX_16X16, bh * bw * 64); +void vp9_optimize_init(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize, + struct optimize_ctx *ctx) { + int p; + + for (p = 0; p < MAX_MB_PLANE; p++) { + const struct macroblockd_plane* const plane = &xd->plane[p]; + const int bwl = b_width_log2(bsize) - plane->subsampling_x; + const int bhl = b_height_log2(bsize) - plane->subsampling_y; + const TX_SIZE tx_size = tx_size_for_plane(xd, bsize, p); + int i, j; + + for (i = 0; i < 1 << bwl; i += 1 << tx_size) { + int c = 0; + ctx->ta[p][i] = 0; + for (j = 0; j < 1 << tx_size && !c; j++) { + c = ctx->ta[p][i] |= plane->above_context[i + j]; + } + } + for (i = 0; i < 1 << bhl; i += 1 << tx_size) { + int c = 0; + ctx->tl[p][i] = 0; + for (j = 0; j < 1 << tx_size && !c; j++) { + c = ctx->tl[p][i] |= plane->left_context[i + j]; + } } - uvoff = (uvoff * 5) >> 2; // switch u -> v } } -void vp9_optimize_sbuv_8x8(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize) { - MACROBLOCKD *const xd = &x->e_mbd; - const int bwl = b_width_log2(bsize) - 1, bhl = b_height_log2(bsize) - 1; - const int bw = 1 << (bwl - 1); - const int bh = 1 << (bhl - 1); - int uvoff = 4 << (bwl + bhl); - int plane, n; - - for (plane = 1; plane < MAX_MB_PLANE; plane++) { - ENTROPY_CONTEXT ta[4], *a = xd->plane[plane].above_context; - ENTROPY_CONTEXT tl[4], *l = xd->plane[plane].left_context; - - for (n = 0; n < bw; n++, a += 2) - ta[n] = (a[0] + a[1]) != 0; - for (n = 0; n < bh; n++, l += 2) - tl[n] = (l[0] + l[1]) != 0; - - for (n = 0; n < bw * bh; n++) { - const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1); - optimize_b(cm, x, uvoff + n * 4, PLANE_TYPE_UV, - x->e_mbd.plane[plane].dequant, - &ta[x_idx], &tl[y_idx], - TX_8X8, bh * bw * 16); - } - uvoff = (uvoff * 5) >> 2; // switch u -> v - } +void vp9_optimize_sby(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize) { + struct optimize_ctx ctx; + struct optimize_block_args arg = {cm, x, &ctx}; + vp9_optimize_init(&x->e_mbd, bsize, &ctx); + foreach_transformed_block_in_plane(&x->e_mbd, bsize, 0, +#if !CONFIG_SB8X8 + 0, +#endif + optimize_block, &arg); } -void vp9_optimize_sbuv_4x4(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize) { - MACROBLOCKD *const xd = &x->e_mbd; - const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize); - const int bw = 1 << (bwl - 1); - const int bh = 1 << (bhl - 1); - int uvoff = 1 << (bwl + bhl); - int plane, n; - - for (plane = 1; plane < MAX_MB_PLANE; plane++) { - ENTROPY_CONTEXT ta[8], tl[8]; - - vpx_memcpy(ta, xd->plane[plane].above_context, - sizeof(ENTROPY_CONTEXT) * bw); - vpx_memcpy(tl, xd->plane[plane].left_context, - sizeof(ENTROPY_CONTEXT) * bh); - - for (n = 0; n < bw * bh; n++) { - const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1); - optimize_b(cm, x, uvoff + n, PLANE_TYPE_UV, - x->e_mbd.plane[plane].dequant, - &ta[x_idx], &tl[y_idx], - TX_4X4, bh * bw * 4); - } - uvoff = (uvoff * 5) >> 2; // switch u -> v - } +void vp9_optimize_sbuv(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize) { + struct optimize_ctx ctx; + struct optimize_block_args arg = {cm, x, &ctx}; + vp9_optimize_init(&x->e_mbd, bsize, &ctx); + foreach_transformed_block_uv(&x->e_mbd, bsize, optimize_block, &arg); } #if !CONFIG_SB8X8 @@ -688,8 +564,8 @@ void vp9_fidct_mb(VP9_COMMON *const cm, MACROBLOCK *x) { vp9_quantize_sby_16x16(x, BLOCK_SIZE_MB16X16); vp9_quantize_sbuv_8x8(x, BLOCK_SIZE_MB16X16); if (x->optimize) { - vp9_optimize_sby_16x16(cm, x, BLOCK_SIZE_MB16X16); - vp9_optimize_sbuv_8x8(cm, x, BLOCK_SIZE_MB16X16); + vp9_optimize_sby(cm, x, BLOCK_SIZE_MB16X16); + vp9_optimize_sbuv(cm, x, BLOCK_SIZE_MB16X16); } vp9_inverse_transform_sby_16x16(xd, BLOCK_SIZE_MB16X16); vp9_inverse_transform_sbuv_8x8(xd, BLOCK_SIZE_MB16X16); @@ -697,20 +573,20 @@ void vp9_fidct_mb(VP9_COMMON *const cm, MACROBLOCK *x) { vp9_transform_sby_8x8(x, BLOCK_SIZE_MB16X16); vp9_quantize_sby_8x8(x, BLOCK_SIZE_MB16X16); if (x->optimize) - vp9_optimize_sby_8x8(cm, x, BLOCK_SIZE_MB16X16); + vp9_optimize_sby(cm, x, BLOCK_SIZE_MB16X16); vp9_inverse_transform_sby_8x8(xd, BLOCK_SIZE_MB16X16); if (xd->mode_info_context->mbmi.mode == SPLITMV) { assert(xd->mode_info_context->mbmi.partitioning != PARTITIONING_4X4); vp9_transform_sbuv_4x4(x, BLOCK_SIZE_MB16X16); vp9_quantize_sbuv_4x4(x, BLOCK_SIZE_MB16X16); if (x->optimize) - vp9_optimize_sbuv_4x4(cm, x, BLOCK_SIZE_MB16X16); + vp9_optimize_sbuv(cm, x, BLOCK_SIZE_MB16X16); vp9_inverse_transform_sbuv_4x4(xd, BLOCK_SIZE_MB16X16); } else { vp9_transform_sbuv_8x8(x, BLOCK_SIZE_MB16X16); vp9_quantize_sbuv_8x8(x, BLOCK_SIZE_MB16X16); if (x->optimize) - vp9_optimize_sbuv_8x8(cm, x, BLOCK_SIZE_MB16X16); + vp9_optimize_sbuv(cm, x, BLOCK_SIZE_MB16X16); vp9_inverse_transform_sbuv_8x8(xd, BLOCK_SIZE_MB16X16); } } else { @@ -719,8 +595,8 @@ void vp9_fidct_mb(VP9_COMMON *const cm, MACROBLOCK *x) { vp9_quantize_sby_4x4(x, BLOCK_SIZE_MB16X16); vp9_quantize_sbuv_4x4(x, BLOCK_SIZE_MB16X16); if (x->optimize) { - vp9_optimize_sby_4x4(cm, x, BLOCK_SIZE_MB16X16); - vp9_optimize_sbuv_4x4(cm, x, BLOCK_SIZE_MB16X16); + vp9_optimize_sby(cm, x, BLOCK_SIZE_MB16X16); + vp9_optimize_sbuv(cm, x, BLOCK_SIZE_MB16X16); } vp9_inverse_transform_sby_4x4(xd, BLOCK_SIZE_MB16X16); vp9_inverse_transform_sbuv_4x4(xd, BLOCK_SIZE_MB16X16); diff --git a/vp9/encoder/vp9_encodemb.h b/vp9/encoder/vp9_encodemb.h index cd20659..b1d8771 100644 --- a/vp9/encoder/vp9_encodemb.h +++ b/vp9/encoder/vp9_encodemb.h @@ -32,29 +32,18 @@ void vp9_encode_inter16x16(VP9_COMMON *const cm, MACROBLOCK *x, void vp9_encode_inter16x16y(MACROBLOCK *x, int mb_row, int mb_col); void vp9_transform_sby_32x32(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); -void vp9_optimize_sby_32x32(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize); void vp9_transform_sby_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); -void vp9_optimize_sby_16x16(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize); void vp9_transform_sby_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); -void vp9_optimize_sby_8x8(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize); void vp9_transform_sby_4x4(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); -void vp9_optimize_sby_4x4(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize); void vp9_transform_sbuv_32x32(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); -void vp9_optimize_sbuv_32x32(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize); void vp9_transform_sbuv_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); -void vp9_optimize_sbuv_16x16(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize); void vp9_transform_sbuv_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); -void vp9_optimize_sbuv_8x8(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize); void vp9_transform_sbuv_4x4(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); -void vp9_optimize_sbuv_4x4(VP9_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE_TYPE bsize); + +void vp9_optimize_sby(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize); +void vp9_optimize_sbuv(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize); #if !CONFIG_SB8X8 void vp9_fidct_mb(VP9_COMMON *const cm, MACROBLOCK *x); -- 2.7.4