From a3874850dd615064719a0c6cd4717d3d656628a3 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Tue, 9 Apr 2013 21:28:27 -0700 Subject: [PATCH] Make SB coding size-independent. Merge sb32x32 and sb64x64 functions; allow for rectangular sizes. Code gives identical encoder results before and after. There are a few macros for rectangular block sizes under the sbsegment experiment; this experiment is not yet functional and should not yet be used. Change-Id: I71f93b5d2a1596e99a6f01f29c3f0a456694d728 --- configure | 1 + vp9/common/vp9_blockd.h | 70 +++- vp9/common/vp9_entropy.h | 14 +- vp9/common/vp9_enums.h | 30 ++ vp9/common/vp9_invtrans.c | 210 +++++------- vp9/common/vp9_invtrans.h | 24 +- vp9/common/vp9_recon.c | 45 +-- vp9/common/vp9_rtcd_defs.sh | 11 +- vp9/decoder/vp9_decodframe.c | 4 +- vp9/encoder/vp9_block.h | 8 + vp9/encoder/vp9_encodeframe.c | 633 +++++++++--------------------------- vp9/encoder/vp9_encodemb.c | 721 +++++++++++++++--------------------------- vp9/encoder/vp9_encodemb.h | 67 ++-- vp9/encoder/vp9_quantize.c | 127 +++----- vp9/encoder/vp9_quantize.h | 24 +- vp9/encoder/vp9_rdopt.c | 79 ++--- vp9/encoder/vp9_tokenize.c | 415 ++++++------------------ vp9/encoder/vp9_tokenize.h | 26 +- vp9/vp9_common.mk | 1 + 19 files changed, 843 insertions(+), 1667 deletions(-) create mode 100644 vp9/common/vp9_enums.h diff --git a/configure b/configure index 83c4c37..acd4e70 100755 --- a/configure +++ b/configure @@ -252,6 +252,7 @@ EXPERIMENT_LIST=" implicit_compoundinter_weight scatterscan oneshotq + sbsegment " CONFIG_LIST=" external_build diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index a147ec7..d0c7707 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -19,6 +19,7 @@ #include "vp9/common/vp9_treecoder.h" #include "vpx_ports/mem.h" #include "vp9/common/vp9_common.h" +#include "vp9/common/vp9_enums.h" #define TRUE 1 #define FALSE 0 @@ -198,11 +199,43 @@ typedef enum { MAX_REF_FRAMES = 4 } MV_REFERENCE_FRAME; -typedef enum { - BLOCK_SIZE_MB16X16 = 0, - BLOCK_SIZE_SB32X32 = 1, - BLOCK_SIZE_SB64X64 = 2, -} BLOCK_SIZE_TYPE; +static INLINE int mb_width_log2(BLOCK_SIZE_TYPE sb_type) { + switch (sb_type) { +#if CONFIG_SBSEGMENT + case BLOCK_SIZE_SB16X32: +#endif + case BLOCK_SIZE_MB16X16: return 0; +#if CONFIG_SBSEGMENT + case BLOCK_SIZE_SB32X16: + case BLOCK_SIZE_SB32X64: +#endif + case BLOCK_SIZE_SB32X32: return 1; +#if CONFIG_SBSEGMENT + case BLOCK_SIZE_SB64X32: +#endif + case BLOCK_SIZE_SB64X64: return 2; + default: assert(0); + } +} + +static INLINE int mb_height_log2(BLOCK_SIZE_TYPE sb_type) { + switch (sb_type) { +#if CONFIG_SBSEGMENT + case BLOCK_SIZE_SB32X16: +#endif + case BLOCK_SIZE_MB16X16: return 0; +#if CONFIG_SBSEGMENT + case BLOCK_SIZE_SB16X32: + case BLOCK_SIZE_SB64X32: +#endif + case BLOCK_SIZE_SB32X32: return 1; +#if CONFIG_SBSEGMENT + case BLOCK_SIZE_SB32X64: +#endif + case BLOCK_SIZE_SB64X64: return 2; + default: assert(0); + } +} typedef struct { MB_PREDICTION_MODE mode, uv_mode; @@ -469,11 +502,12 @@ static TX_TYPE get_tx_type_4x4(const MACROBLOCKD *xd, int ib) { // is smaller than the prediction size TX_TYPE tx_type = DCT_DCT; const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type; + const int wb = mb_width_log2(sb_type), hb = mb_height_log2(sb_type); #if !USE_ADST_FOR_SB - if (sb_type) + if (sb_type > BLOCK_SIZE_MB16X16) return tx_type; #endif - if (ib >= (16 << (2 * sb_type))) // no chroma adst + if (ib >= (16 << (wb + hb))) // no chroma adst return tx_type; if (xd->lossless) return DCT_DCT; @@ -524,7 +558,7 @@ static TX_TYPE get_tx_type_4x4(const MACROBLOCKD *xd, int ib) { xd->q_index < ACTIVE_HT) { #if USE_ADST_FOR_I16X16_4X4 #if USE_ADST_PERIPHERY_ONLY - const int hmax = 4 << sb_type; + const int hmax = 4 << wb; tx_type = txfm_map(pred_mode_conv(xd->mode_info_context->mbmi.mode)); #if USE_ADST_FOR_REMOTE_EDGE if ((ib & (hmax - 1)) != 0 && ib >= hmax) @@ -557,11 +591,12 @@ static TX_TYPE get_tx_type_8x8(const MACROBLOCKD *xd, int ib) { // is smaller than the prediction size TX_TYPE tx_type = DCT_DCT; const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type; + const int wb = mb_width_log2(sb_type), hb = mb_height_log2(sb_type); #if !USE_ADST_FOR_SB - if (sb_type) + if (sb_type > BLOCK_SIZE_MB16X16) return tx_type; #endif - if (ib >= (16 << (2 * sb_type))) // no chroma adst + if (ib >= (16 << (wb + hb))) // no chroma adst return tx_type; if (xd->mode_info_context->mbmi.mode == I8X8_PRED && xd->q_index < ACTIVE_HT8) { @@ -574,7 +609,7 @@ static TX_TYPE get_tx_type_8x8(const MACROBLOCKD *xd, int ib) { xd->q_index < ACTIVE_HT8) { #if USE_ADST_FOR_I16X16_8X8 #if USE_ADST_PERIPHERY_ONLY - const int hmax = 4 << sb_type; + const int hmax = 4 << wb; tx_type = txfm_map(pred_mode_conv(xd->mode_info_context->mbmi.mode)); #if USE_ADST_FOR_REMOTE_EDGE if ((ib & (hmax - 1)) != 0 && ib >= hmax) @@ -605,18 +640,19 @@ static TX_TYPE get_tx_type_8x8(const MACROBLOCKD *xd, int ib) { static TX_TYPE get_tx_type_16x16(const MACROBLOCKD *xd, int ib) { TX_TYPE tx_type = DCT_DCT; const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type; + const int wb = mb_width_log2(sb_type), hb = mb_height_log2(sb_type); #if !USE_ADST_FOR_SB - if (sb_type) + if (sb_type > BLOCK_SIZE_MB16X16) return tx_type; #endif - if (ib >= (16 << (2 * sb_type))) + if (ib >= (16 << (wb + hb))) return tx_type; if (xd->mode_info_context->mbmi.mode < I8X8_PRED && xd->q_index < ACTIVE_HT16) { tx_type = txfm_map(pred_mode_conv(xd->mode_info_context->mbmi.mode)); #if USE_ADST_PERIPHERY_ONLY - if (sb_type) { - const int hmax = 4 << sb_type; + if (sb_type > BLOCK_SIZE_MB16X16) { + const int hmax = 4 << wb; #if USE_ADST_FOR_REMOTE_EDGE if ((ib & (hmax - 1)) != 0 && ib >= hmax) tx_type = DCT_DCT; @@ -658,6 +694,10 @@ static TX_SIZE get_uv_tx_size(const MACROBLOCKD *xd) { switch (mbmi->sb_type) { case BLOCK_SIZE_SB64X64: return size; +#if CONFIG_SBSEGMENT + case BLOCK_SIZE_SB64X32: + case BLOCK_SIZE_SB32X64: +#endif case BLOCK_SIZE_SB32X32: if (size == TX_32X32) return TX_16X16; diff --git a/vp9/common/vp9_entropy.h b/vp9/common/vp9_entropy.h index 64f5950..d23f8c4 100644 --- a/vp9/common/vp9_entropy.h +++ b/vp9/common/vp9_entropy.h @@ -122,16 +122,12 @@ static INLINE void vp9_reset_mb_tokens_context(MACROBLOCKD* const xd) { vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); } -static INLINE void vp9_reset_sb_tokens_context(MACROBLOCKD* const xd) { +static INLINE void vp9_reset_sb_tokens_context(MACROBLOCKD* const xd, + BLOCK_SIZE_TYPE bsize) { /* Clear entropy contexts */ - vpx_memset(xd->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * 2); - vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * 2); -} - -static INLINE void vp9_reset_sb64_tokens_context(MACROBLOCKD* const xd) { - /* Clear entropy contexts */ - vpx_memset(xd->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * 4); - vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * 4); + const int bw = 1 << mb_width_log2(bsize), bh = 1 << mb_height_log2(bsize); + vpx_memset(xd->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * bw); + vpx_memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * bh); } extern const int vp9_coef_bands8x8[64]; diff --git a/vp9/common/vp9_enums.h b/vp9/common/vp9_enums.h new file mode 100644 index 0000000..efa84c4 --- /dev/null +++ b/vp9/common/vp9_enums.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_COMMON_VP9_ENUMS_H_ +#define VP9_COMMON_VP9_ENUMS_H_ + +#include "./vpx_config.h" + +typedef enum BLOCK_SIZE_TYPE { + BLOCK_SIZE_MB16X16, +#if CONFIG_SBSEGMENT + BLOCK_SIZE_SB16X32, + BLOCK_SIZE_SB32X16, +#endif + BLOCK_SIZE_SB32X32, +#if CONFIG_SBSEGMENT + BLOCK_SIZE_SB32X64, + BLOCK_SIZE_SB64X32, +#endif + BLOCK_SIZE_SB64X64, +} BLOCK_SIZE_TYPE; + +#endif // VP9_COMMON_VP9_ENUMS_H_ diff --git a/vp9/common/vp9_invtrans.c b/vp9/common/vp9_invtrans.c index 5175d63..88c931d 100644 --- a/vp9/common/vp9_invtrans.c +++ b/vp9/common/vp9_invtrans.c @@ -111,210 +111,150 @@ void vp9_inverse_transform_mb_16x16(MACROBLOCKD *xd) { vp9_inverse_transform_mbuv_8x8(xd); } -void vp9_inverse_transform_sby_32x32(MACROBLOCKD *xd) { - vp9_short_idct32x32(BLOCK_OFFSET(xd->plane[0].dqcoeff, 0, 16), xd->diff, 64); -} - -void vp9_inverse_transform_sby_16x16(MACROBLOCKD *xd) { +void vp9_inverse_transform_sby_32x32(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) { + const int bwl = mb_width_log2(bsize) - 1, bw = 1 << bwl; + const int bh = 1 << (mb_height_log2(bsize) - 1); + const int stride = 32 << bwl; int n; - for (n = 0; n < 4; n++) { - const int x_idx = n & 1, y_idx = n >> 1; - const TX_TYPE tx_type = get_tx_type_16x16(xd, (y_idx * 8 + x_idx) * 4); - - if (tx_type == DCT_DCT) { - vp9_inverse_transform_b_16x16(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 256), - xd->diff + x_idx * 16 + y_idx * 32 * 16, - 64); - } else { - vp9_short_iht16x16(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 256), - xd->diff + x_idx * 16 + y_idx * 32 * 16, 32, tx_type); - } - } -} - -void vp9_inverse_transform_sby_8x8(MACROBLOCKD *xd) { - int n; - - for (n = 0; n < 16; n++) { - const int x_idx = n & 3, y_idx = n >> 2; - const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * 8 + x_idx) * 2); - - if (tx_type == DCT_DCT) { - vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 64), - xd->diff + x_idx * 8 + y_idx * 32 * 8, 64); - } else { - vp9_short_iht8x8(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 64), - xd->diff + x_idx * 8 + y_idx * 32 * 8, 32, tx_type); - } - } -} - -void vp9_inverse_transform_sby_4x4(MACROBLOCKD *xd) { - int n; - - for (n = 0; n < 64; n++) { - const int x_idx = n & 7, y_idx = n >> 3; - const TX_TYPE tx_type = get_tx_type_4x4(xd, y_idx * 8 + x_idx); - - if (tx_type == DCT_DCT) { - vp9_inverse_transform_b_4x4(xd, xd->plane[0].eobs[n], - BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 16), - xd->diff + x_idx * 4 + y_idx * 4 * 32, 64); - } else { - vp9_short_iht4x4(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 16), - xd->diff + x_idx * 4 + y_idx * 4 * 32, 32, tx_type); - } - } -} - -void vp9_inverse_transform_sbuv_16x16(MACROBLOCKD *xd) { - vp9_inverse_transform_b_16x16(xd->plane[1].dqcoeff, - xd->diff + 1024, 32); - vp9_inverse_transform_b_16x16(xd->plane[2].dqcoeff, - xd->diff + 1280, 32); -} - -void vp9_inverse_transform_sbuv_8x8(MACROBLOCKD *xd) { - int n; - - for (n = 0; n < 4; n++) { - const int x_idx = n & 1, y_idx = n >> 1; - - vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[1].dqcoeff, n, 64), - xd->diff + 1024 + x_idx * 8 + y_idx * 16 * 8, - 32); - vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[2].dqcoeff, n, 64), - xd->diff + 1280 + x_idx * 8 + y_idx * 16 * 8, - 32); - } -} - -void vp9_inverse_transform_sbuv_4x4(MACROBLOCKD *xd) { - int n; - - for (n = 0; n < 16; n++) { - const int x_idx = n & 3, y_idx = n >> 2; - - vp9_inverse_transform_b_4x4(xd, xd->plane[1].eobs[n], - BLOCK_OFFSET(xd->plane[1].dqcoeff, n, 16), - xd->diff + 1024 + x_idx * 4 + y_idx * 16 * 4, - 32); - vp9_inverse_transform_b_4x4(xd, xd->plane[2].eobs[n], - BLOCK_OFFSET(xd->plane[2].dqcoeff, n, 16), - xd->diff + 1280 + x_idx * 4 + y_idx * 16 * 4, - 32); - } -} - -void vp9_inverse_transform_sb64y_32x32(MACROBLOCKD *xd) { - int n; - - for (n = 0; n < 4; n++) { - const int x_idx = n & 1, y_idx = n >> 1; + for (n = 0; n < bw * bh; n++) { + const int x_idx = n & (bw - 1), y_idx = n >> bwl; vp9_short_idct32x32(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 1024), - xd->diff + x_idx * 32 + y_idx * 32 * 64, 128); + xd->diff + x_idx * 32 + y_idx * 32 * stride, + stride * 2); } } -void vp9_inverse_transform_sb64y_16x16(MACROBLOCKD *xd) { +void vp9_inverse_transform_sby_16x16(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) { + const int bwl = mb_width_log2(bsize), bw = 1 << bwl; + const int bh = 1 << mb_height_log2(bsize); + const int stride = 16 << bwl, bstride = 4 << bwl; int n; - for (n = 0; n < 16; n++) { - const int x_idx = n & 3, y_idx = n >> 2; - const TX_TYPE tx_type = get_tx_type_16x16(xd, (y_idx * 16 + x_idx) * 4); + for (n = 0; n < bw * bh; n++) { + const int x_idx = n & (bw - 1), y_idx = n >> bwl; + const TX_TYPE tx_type = get_tx_type_16x16(xd, + (y_idx * bstride + x_idx) * 4); if (tx_type == DCT_DCT) { vp9_inverse_transform_b_16x16(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 256), - xd->diff + x_idx * 16 + y_idx * 64 * 16, - 128); + xd->diff + x_idx * 16 + y_idx * stride * 16, + stride * 2); } else { vp9_short_iht16x16(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 256), - xd->diff + x_idx * 16 + y_idx * 64 * 16, 64, tx_type); + xd->diff + x_idx * 16 + y_idx * stride * 16, + stride, tx_type); } } } -void vp9_inverse_transform_sb64y_8x8(MACROBLOCKD *xd) { +void vp9_inverse_transform_sby_8x8(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) { + const int bwl = mb_width_log2(bsize) + 1, bw = 1 << bwl; + const int bh = 1 << (mb_height_log2(bsize) + 1); + const int stride = 8 << bwl, bstride = 2 << bwl; int n; - for (n = 0; n < 64; n++) { - const int x_idx = n & 7, y_idx = n >> 3; - const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * 16 + x_idx) * 2); + for (n = 0; n < bw * bh; n++) { + const int x_idx = n & (bw - 1), y_idx = n >> bwl; + const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * bstride + x_idx) * 2); if (tx_type == DCT_DCT) { vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 64), - xd->diff + x_idx * 8 + y_idx * 64 * 8, 128); + xd->diff + x_idx * 8 + y_idx * stride * 8, + stride * 2); } else { vp9_short_iht8x8(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 64), - xd->diff + x_idx * 8 + y_idx * 64 * 8, 64, tx_type); + xd->diff + x_idx * 8 + y_idx * stride * 8, + stride, tx_type); } } } -void vp9_inverse_transform_sb64y_4x4(MACROBLOCKD *xd) { +void vp9_inverse_transform_sby_4x4(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) { + const int bwl = mb_width_log2(bsize) + 2, bw = 1 << bwl; + const int bh = 1 << (mb_height_log2(bsize) + 2); + const int stride = 4 << bwl, bstride = 1 << bwl; int n; - for (n = 0; n < 256; n++) { - const int x_idx = n & 15, y_idx = n >> 4; - const TX_TYPE tx_type = get_tx_type_4x4(xd, y_idx * 16 + x_idx); + for (n = 0; n < bw * bh; n++) { + const int x_idx = n & (bw - 1), y_idx = n >> bwl; + const TX_TYPE tx_type = get_tx_type_4x4(xd, y_idx * bstride + x_idx); if (tx_type == DCT_DCT) { vp9_inverse_transform_b_4x4(xd, xd->plane[0].eobs[n], BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 16), - xd->diff + x_idx * 4 + y_idx * 4 * 64, 128); + xd->diff + x_idx * 4 + y_idx * 4 * stride, + stride * 2); } else { vp9_short_iht4x4(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 16), - xd->diff + x_idx * 4 + y_idx * 4 * 64, 64, tx_type); + xd->diff + x_idx * 4 + y_idx * 4 * stride, + stride, tx_type); } } } -void vp9_inverse_transform_sb64uv_32x32(MACROBLOCKD *xd) { +void vp9_inverse_transform_sbuv_32x32(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) { + assert(bsize == BLOCK_SIZE_SB64X64); + vp9_short_idct32x32(xd->plane[1].dqcoeff, xd->diff + 4096, 64); vp9_short_idct32x32(xd->plane[2].dqcoeff, xd->diff + 4096 + 1024, 64); } -void vp9_inverse_transform_sb64uv_16x16(MACROBLOCKD *xd) { +void vp9_inverse_transform_sbuv_16x16(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) { + const int bwl = mb_width_log2(bsize), bhl = mb_height_log2(bsize); + const int uoff = (16 * 16) << (bwl + bhl), voff = (uoff * 5) >> 2; + const int bw = 1 << (bwl - 1), bh = 1 << (bhl - 1); + const int stride = 16 << (bwl - 1); int n; - for (n = 0; n < 4; n++) { - const int x_idx = n & 1, y_idx = n >> 1, off = x_idx * 16 + y_idx * 32 * 16; + for (n = 0; n < bw * bh; n++) { + const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1); + const int off = x_idx * 16 + y_idx * stride * 16; vp9_inverse_transform_b_16x16(BLOCK_OFFSET(xd->plane[1].dqcoeff, n, 256), - xd->diff + 4096 + off, 64); + xd->diff + uoff + off, stride * 2); vp9_inverse_transform_b_16x16(BLOCK_OFFSET(xd->plane[2].dqcoeff, n, 256), - xd->diff + 4096 + 1024 + off, 64); + xd->diff + voff + off, stride * 2); } } -void vp9_inverse_transform_sb64uv_8x8(MACROBLOCKD *xd) { +void vp9_inverse_transform_sbuv_8x8(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) { + const int bwl = mb_width_log2(bsize) + 1, bhl = mb_height_log2(bsize) + 1; + const int uoff = (8 * 8) << (bwl + bhl), voff = (uoff * 5) >> 2; + const int bw = 1 << (bwl - 1), bh = 1 << (bhl - 1); + const int stride = 8 << (bwl - 1); int n; - for (n = 0; n < 16; n++) { - const int x_idx = n & 3, y_idx = n >> 2, off = x_idx * 8 + y_idx * 32 * 8; + for (n = 0; n < bw * bh; n++) { + const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1); + const int off = x_idx * 8 + y_idx * stride * 8; vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[1].dqcoeff, n, 64), - xd->diff + 4096 + off, 64); + xd->diff + uoff + off, stride * 2); vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[2].dqcoeff, n, 64), - xd->diff + 4096 + 1024 + off, 64); + xd->diff + voff + off, stride * 2); } } -void vp9_inverse_transform_sb64uv_4x4(MACROBLOCKD *xd) { +void vp9_inverse_transform_sbuv_4x4(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) { + const int bwl = mb_width_log2(bsize) + 2, bhl = mb_height_log2(bsize) + 2; + const int uoff = (4 * 4) << (bwl + bhl), voff = (uoff * 5) >> 2; + const int bw = 1 << (bwl - 1), bh = 1 << (bhl - 1); + const int stride = 4 << (bwl - 1); int n; - for (n = 0; n < 64; n++) { - const int x_idx = n & 7, y_idx = n >> 3, off = x_idx * 4 + y_idx * 32 * 4; + for (n = 0; n < bw * bh; n++) { + const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1); + const int off = x_idx * 4 + y_idx * stride * 4; vp9_inverse_transform_b_4x4(xd, xd->plane[1].eobs[n], BLOCK_OFFSET(xd->plane[1].dqcoeff, n, 16), - xd->diff + 4096 + off, 64); + xd->diff + uoff + off, stride * 2); vp9_inverse_transform_b_4x4(xd, xd->plane[2].eobs[n], BLOCK_OFFSET(xd->plane[2].dqcoeff, n, 16), - xd->diff + 4096 + 1024 + off, 64); + xd->diff + voff + off, stride * 2); } } diff --git a/vp9/common/vp9_invtrans.h b/vp9/common/vp9_invtrans.h index 8991657..7a72218 100644 --- a/vp9/common/vp9_invtrans.h +++ b/vp9/common/vp9_invtrans.h @@ -41,21 +41,13 @@ void vp9_inverse_transform_mb_16x16(MACROBLOCKD *xd); void vp9_inverse_transform_mby_16x16(MACROBLOCKD *xd); -void vp9_inverse_transform_sby_32x32(MACROBLOCKD *xd); -void vp9_inverse_transform_sby_16x16(MACROBLOCKD *xd); -void vp9_inverse_transform_sby_8x8(MACROBLOCKD *xd); -void vp9_inverse_transform_sby_4x4(MACROBLOCKD *xd); -void vp9_inverse_transform_sbuv_16x16(MACROBLOCKD *xd); -void vp9_inverse_transform_sbuv_8x8(MACROBLOCKD *xd); -void vp9_inverse_transform_sbuv_4x4(MACROBLOCKD *xd); - -void vp9_inverse_transform_sb64y_32x32(MACROBLOCKD *xd); -void vp9_inverse_transform_sb64y_16x16(MACROBLOCKD *xd); -void vp9_inverse_transform_sb64y_8x8(MACROBLOCKD *xd); -void vp9_inverse_transform_sb64y_4x4(MACROBLOCKD *xd); -void vp9_inverse_transform_sb64uv_32x32(MACROBLOCKD *xd); -void vp9_inverse_transform_sb64uv_16x16(MACROBLOCKD *xd); -void vp9_inverse_transform_sb64uv_8x8(MACROBLOCKD *xd); -void vp9_inverse_transform_sb64uv_4x4(MACROBLOCKD *xd); +void vp9_inverse_transform_sby_32x32(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize); +void vp9_inverse_transform_sby_16x16(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize); +void vp9_inverse_transform_sby_8x8(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize); +void vp9_inverse_transform_sby_4x4(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize); +void vp9_inverse_transform_sbuv_32x32(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize); +void vp9_inverse_transform_sbuv_16x16(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize); +void vp9_inverse_transform_sbuv_8x8(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize); +void vp9_inverse_transform_sbuv_4x4(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize); #endif // VP9_COMMON_VP9_INVTRANS_H_ diff --git a/vp9/common/vp9_recon.c b/vp9/common/vp9_recon.c index 6815306..c32f860 100644 --- a/vp9/common/vp9_recon.c +++ b/vp9/common/vp9_recon.c @@ -84,56 +84,45 @@ void vp9_recon_mbuv_s_c(MACROBLOCKD *xd, uint8_t *udst, uint8_t *vdst) { } } -static INLINE void recon_sby(MACROBLOCKD *mb, uint8_t *dst, int size) { +void vp9_recon_sby_s_c(MACROBLOCKD *mb, uint8_t *dst, + BLOCK_SIZE_TYPE bsize) { + const int bw = 16 << mb_width_log2(bsize), bh = 16 << mb_height_log2(bsize); int x, y; const int stride = mb->block[0].dst_stride; const int16_t *diff = mb->diff; - for (y = 0; y < size; y++) { - for (x = 0; x < size; x++) + for (y = 0; y < bh; y++) { + for (x = 0; x < bw; x++) dst[x] = clip_pixel(dst[x] + diff[x]); dst += stride; - diff += size; + diff += bw; } } -static INLINE void recon_sbuv(MACROBLOCKD *mb, uint8_t *u_dst, uint8_t *v_dst, - int y_offset, int size) { +void vp9_recon_sbuv_s_c(MACROBLOCKD *mb, uint8_t *u_dst, uint8_t *v_dst, + BLOCK_SIZE_TYPE bsize) { + const int bwl = mb_width_log2(bsize), bhl = mb_height_log2(bsize); + const int uoff = (16 * 16) << (bwl + bhl), voff = (uoff * 5) >> 2; + const int bw = 8 << bwl, bh = 8 << bhl; int x, y; const int stride = mb->block[16].dst_stride; - const int16_t *u_diff = mb->diff + y_offset; - const int16_t *v_diff = mb->diff + y_offset + size*size; + const int16_t *u_diff = mb->diff + uoff; + const int16_t *v_diff = mb->diff + voff; - for (y = 0; y < size; y++) { - for (x = 0; x < size; x++) { + for (y = 0; y < bh; y++) { + for (x = 0; x < bw; x++) { u_dst[x] = clip_pixel(u_dst[x] + u_diff[x]); v_dst[x] = clip_pixel(v_dst[x] + v_diff[x]); } u_dst += stride; v_dst += stride; - u_diff += size; - v_diff += size; + u_diff += bw; + v_diff += bw; } } -void vp9_recon_sby_s_c(MACROBLOCKD *mb, uint8_t *dst) { - recon_sby(mb, dst, 32); -} - -void vp9_recon_sbuv_s_c(MACROBLOCKD *mb, uint8_t *u_dst, uint8_t *v_dst) { - recon_sbuv(mb, u_dst, v_dst, 1024, 16); -} - -void vp9_recon_sb64y_s_c(MACROBLOCKD *mb, uint8_t *dst) { - recon_sby(mb, dst, 64); -} - -void vp9_recon_sb64uv_s_c(MACROBLOCKD *mb, uint8_t *u_dst, uint8_t *v_dst) { - recon_sbuv(mb, u_dst, v_dst, 4096, 32); -} - void vp9_recon_mby_c(MACROBLOCKD *xd) { int i; diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index cf95524..d98b947 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -5,6 +5,7 @@ cat <mbmi.mb_skip_coeff) { - vp9_reset_sb64_tokens_context(xd); + vp9_reset_sb_tokens_context(xd, BLOCK_SIZE_SB64X64); // Special case: Force the loopfilter to skip when eobtotal and // mb_skip_coeff are zero. @@ -753,7 +753,7 @@ static void decode_sb32(VP9D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb_col, mb_init_dequantizer(pbi, xd); if (mi->mbmi.mb_skip_coeff) { - vp9_reset_sb_tokens_context(xd); + vp9_reset_sb_tokens_context(xd, BLOCK_SIZE_SB32X32); // Special case: Force the loopfilter to skip when eobtotal and // mb_skip_coeff are zero. diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index b2021d7..7c50756 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -163,8 +163,16 @@ struct macroblock { // Structure to hold context for each of the 4 MBs within a SB: // when encoded as 4 independent MBs: PICK_MODE_CONTEXT mb_context[4][4]; +#if CONFIG_SBSEGMENT + PICK_MODE_CONTEXT sb32x16_context[4][2]; + PICK_MODE_CONTEXT sb16x32_context[4][2]; +#endif // when 4 MBs share coding parameters: PICK_MODE_CONTEXT sb32_context[4]; +#if CONFIG_SBSEGMENT + PICK_MODE_CONTEXT sb32x64_context[2]; + PICK_MODE_CONTEXT sb64x32_context[2]; +#endif PICK_MODE_CONTEXT sb64_context; void (*fwd_txm4x4)(int16_t *input, int16_t *output, int pitch); diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index d827060..19bc168 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -50,11 +50,9 @@ void vp9_select_interp_filter_type(VP9_COMP *cpi); static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled, int mb_row, int mb_col); -static void encode_superblock32(VP9_COMP *cpi, TOKENEXTRA **t, - int output_enabled, int mb_row, int mb_col); - -static void encode_superblock64(VP9_COMP *cpi, TOKENEXTRA **t, - int output_enabled, int mb_row, int mb_col); +static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, + int output_enabled, int mb_row, int mb_col, + BLOCK_SIZE_TYPE bsize); static void adjust_act_zbin(VP9_COMP *cpi, MACROBLOCK *x); @@ -429,7 +427,8 @@ static unsigned int pick_best_mv_ref(MACROBLOCK *x, #endif static void update_state(VP9_COMP *cpi, - PICK_MODE_CONTEXT *ctx, int block_size, + PICK_MODE_CONTEXT *ctx, + BLOCK_SIZE_TYPE bsize, int output_enabled) { int i, x_idx, y; MACROBLOCK *const x = &cpi->mb; @@ -439,28 +438,28 @@ static void update_state(VP9_COMP *cpi, int mb_mode = mi->mbmi.mode; int mb_mode_index = ctx->best_mode_index; const int mis = cpi->common.mode_info_stride; - int mb_block_size = 1 << mi->mbmi.sb_type; + const int bh = 1 << mb_height_log2(bsize), bw = 1 << mb_width_log2(bsize); #if CONFIG_DEBUG assert(mb_mode < MB_MODE_COUNT); assert(mb_mode_index < MAX_MODES); assert(mi->mbmi.ref_frame < MAX_REF_FRAMES); #endif - assert(mi->mbmi.sb_type == (block_size >> 5)); + assert(mi->mbmi.sb_type == bsize); // Restore the coding context of the MB to that that was in place // when the mode was picked for it - for (y = 0; y < mb_block_size; y++) { - for (x_idx = 0; x_idx < mb_block_size; x_idx++) { - if ((xd->mb_to_right_edge >> 7) + mb_block_size > x_idx && - (xd->mb_to_bottom_edge >> 7) + mb_block_size > y) { + for (y = 0; y < bh; y++) { + for (x_idx = 0; x_idx < bw; x_idx++) { + if ((xd->mb_to_right_edge >> 7) + bw > x_idx && + (xd->mb_to_bottom_edge >> 7) + bh > y) { MODE_INFO *mi_addr = xd->mode_info_context + x_idx + y * mis; vpx_memcpy(mi_addr, mi, sizeof(MODE_INFO)); } } } - if (block_size == 16) { + if (bsize < BLOCK_SIZE_SB32X32) { ctx->txfm_rd_diff[ALLOW_32X32] = ctx->txfm_rd_diff[ALLOW_16X16]; } @@ -602,10 +601,11 @@ static void update_state(VP9_COMP *cpi, } } -static unsigned find_seg_id(uint8_t *buf, int block_size, +static unsigned find_seg_id(uint8_t *buf, BLOCK_SIZE_TYPE bsize, int start_y, int height, int start_x, int width) { - const int end_x = MIN(start_x + block_size, width); - const int end_y = MIN(start_y + block_size, height); + const int bw = 1 << mb_width_log2(bsize), bh = 1 << mb_height_log2(bsize); + const int end_x = MIN(start_x + bw, width); + const int end_y = MIN(start_y + bh, height); int x, y; unsigned seg_id = -1; @@ -620,7 +620,7 @@ static unsigned find_seg_id(uint8_t *buf, int block_size, } static void set_offsets(VP9_COMP *cpi, - int mb_row, int mb_col, int block_size) { + int mb_row, int mb_col, BLOCK_SIZE_TYPE bsize) { MACROBLOCK *const x = &cpi->mb; VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; @@ -628,6 +628,7 @@ static void set_offsets(VP9_COMP *cpi, const int dst_fb_idx = cm->new_fb_idx; const int idx_map = mb_row * cm->mb_cols + mb_col; const int idx_str = xd->mode_info_stride * mb_row + mb_col; + const int bw = 1 << mb_width_log2(bsize), bh = 1 << mb_height_log2(bsize); // entropy context structures xd->above_context = cm->above_context + mb_col; @@ -656,15 +657,14 @@ static void set_offsets(VP9_COMP *cpi, x->mv_row_min = -((mb_row * 16) + VP9BORDERINPIXELS - VP9_INTERP_EXTEND); x->mv_col_min = -((mb_col * 16) + VP9BORDERINPIXELS - VP9_INTERP_EXTEND); x->mv_row_max = ((cm->mb_rows - mb_row) * 16 + - (VP9BORDERINPIXELS - block_size - VP9_INTERP_EXTEND)); + (VP9BORDERINPIXELS - 16 * bh - VP9_INTERP_EXTEND)); x->mv_col_max = ((cm->mb_cols - mb_col) * 16 + - (VP9BORDERINPIXELS - block_size - VP9_INTERP_EXTEND)); + (VP9BORDERINPIXELS - 16 * bw - VP9_INTERP_EXTEND)); // Set up distance of MB to edge of frame in 1/8th pel units - block_size >>= 4; // in macroblock units - assert(!(mb_col & (block_size - 1)) && !(mb_row & (block_size - 1))); - set_mb_row(cm, xd, mb_row, block_size); - set_mb_col(cm, xd, mb_col, block_size); + assert(!(mb_col & (bw - 1)) && !(mb_row & (bh - 1))); + set_mb_row(cm, xd, mb_row, bh); + set_mb_col(cm, xd, mb_col, bw); /* set up source buffers */ setup_pred_block(&x->src, cpi->Source, mb_row, mb_col, NULL, NULL); @@ -676,10 +676,10 @@ static void set_offsets(VP9_COMP *cpi, /* segment ID */ if (xd->segmentation_enabled) { if (xd->update_mb_segmentation_map) { - mbmi->segment_id = find_seg_id(cpi->segmentation_map, block_size, + mbmi->segment_id = find_seg_id(cpi->segmentation_map, bsize, mb_row, cm->mb_rows, mb_col, cm->mb_cols); } else { - mbmi->segment_id = find_seg_id(cm->last_frame_seg_map, block_size, + mbmi->segment_id = find_seg_id(cm->last_frame_seg_map, bsize, mb_row, cm->mb_rows, mb_col, cm->mb_cols); } assert(mbmi->segment_id <= 3); @@ -747,7 +747,7 @@ static int pick_mb_modes(VP9_COMP *cpi, // Index of the MB in the SB 0..3 xd->mb_index = i; - set_offsets(cpi, mb_row, mb_col, 16); + set_offsets(cpi, mb_row, mb_col, BLOCK_SIZE_MB16X16); if (cpi->oxcf.tuning == VP8_TUNE_SSIM) vp9_activity_masking(cpi, x); @@ -832,7 +832,7 @@ static void pick_sb_modes(VP9_COMP *cpi, MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; - set_offsets(cpi, mb_row, mb_col, 32); + set_offsets(cpi, mb_row, mb_col, BLOCK_SIZE_SB32X32); xd->mode_info_context->mbmi.sb_type = BLOCK_SIZE_SB32X32; if (cpi->oxcf.tuning == VP8_TUNE_SSIM) vp9_activity_masking(cpi, x); @@ -862,7 +862,7 @@ static void pick_sb64_modes(VP9_COMP *cpi, MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; - set_offsets(cpi, mb_row, mb_col, 64); + set_offsets(cpi, mb_row, mb_col, BLOCK_SIZE_SB64X64); xd->mode_info_context->mbmi.sb_type = BLOCK_SIZE_SB64X64; if (cpi->oxcf.tuning == VP8_TUNE_SSIM) vp9_activity_masking(cpi, x); @@ -949,11 +949,12 @@ static void encode_sb(VP9_COMP *cpi, cpi->sb32_count[is_sb]++; if (is_sb) { - set_offsets(cpi, mb_row, mb_col, 32); - update_state(cpi, &x->sb32_context[xd->sb_index], 32, output_enabled); + set_offsets(cpi, mb_row, mb_col, BLOCK_SIZE_SB32X32); + update_state(cpi, &x->sb32_context[xd->sb_index], + BLOCK_SIZE_SB32X32, output_enabled); - encode_superblock32(cpi, tp, - output_enabled, mb_row, mb_col); + encode_superblock(cpi, tp, + output_enabled, mb_row, mb_col, BLOCK_SIZE_SB32X32); if (output_enabled) { update_stats(cpi, mb_row, mb_col); } @@ -975,9 +976,10 @@ static void encode_sb(VP9_COMP *cpi, continue; } - set_offsets(cpi, mb_row + y_idx, mb_col + x_idx, 16); + set_offsets(cpi, mb_row + y_idx, mb_col + x_idx, BLOCK_SIZE_MB16X16); xd->mb_index = i; - update_state(cpi, &x->mb_context[xd->sb_index][i], 16, output_enabled); + update_state(cpi, &x->mb_context[xd->sb_index][i], + BLOCK_SIZE_MB16X16, output_enabled); if (cpi->oxcf.tuning == VP8_TUNE_SSIM) vp9_activity_masking(cpi, x); @@ -1018,10 +1020,10 @@ static void encode_sb64(VP9_COMP *cpi, cpi->sb64_count[is_sb[0] == 2]++; if (is_sb[0] == 2) { - set_offsets(cpi, mb_row, mb_col, 64); - update_state(cpi, &x->sb64_context, 64, 1); - encode_superblock64(cpi, tp, - 1, mb_row, mb_col); + set_offsets(cpi, mb_row, mb_col, BLOCK_SIZE_SB64X64); + update_state(cpi, &x->sb64_context, BLOCK_SIZE_SB64X64, 1); + encode_superblock(cpi, tp, + 1, mb_row, mb_col, BLOCK_SIZE_SB64X64); update_stats(cpi, mb_row, mb_col); (*tp)->Token = EOSB_TOKEN; @@ -1380,23 +1382,6 @@ static int check_dual_ref_flags(VP9_COMP *cpi) { } } -static void reset_skip_txfm_size_mb(VP9_COMP *cpi, - MODE_INFO *mi, TX_SIZE txfm_max) { - MB_MODE_INFO *const mbmi = &mi->mbmi; - - if (mbmi->txfm_size > txfm_max) { - VP9_COMMON *const cm = &cpi->common; - MACROBLOCK *const x = &cpi->mb; - MACROBLOCKD *const xd = &x->e_mbd; - const int segment_id = mbmi->segment_id; - - xd->mode_info_context = mi; - assert((vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) || - (cm->mb_no_coeff_skip && mbmi->mb_skip_coeff)); - mbmi->txfm_size = txfm_max; - } -} - static int get_skip_flag(MODE_INFO *mi, int mis, int ymbs, int xmbs) { int x, y; @@ -1420,29 +1405,10 @@ static void set_txfm_flag(MODE_INFO *mi, int mis, int ymbs, int xmbs, } } -static void reset_skip_txfm_size_sb32(VP9_COMP *cpi, MODE_INFO *mi, - int mis, TX_SIZE txfm_max, - int mb_rows_left, int mb_cols_left) { - MB_MODE_INFO *const mbmi = &mi->mbmi; - - if (mbmi->txfm_size > txfm_max) { - VP9_COMMON *const cm = &cpi->common; - MACROBLOCK *const x = &cpi->mb; - MACROBLOCKD *const xd = &x->e_mbd; - const int segment_id = mbmi->segment_id; - const int ymbs = MIN(2, mb_rows_left); - const int xmbs = MIN(2, mb_cols_left); - - xd->mode_info_context = mi; - assert((vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) || - (cm->mb_no_coeff_skip && get_skip_flag(mi, mis, ymbs, xmbs))); - set_txfm_flag(mi, mis, ymbs, xmbs, txfm_max); - } -} - -static void reset_skip_txfm_size_sb64(VP9_COMP *cpi, MODE_INFO *mi, - int mis, TX_SIZE txfm_max, - int mb_rows_left, int mb_cols_left) { +static void reset_skip_txfm_size_sb(VP9_COMP *cpi, MODE_INFO *mi, + int mis, TX_SIZE txfm_max, + int mb_rows_left, int mb_cols_left, + BLOCK_SIZE_TYPE bsize) { MB_MODE_INFO *const mbmi = &mi->mbmi; if (mbmi->txfm_size > txfm_max) { @@ -1450,8 +1416,9 @@ static void reset_skip_txfm_size_sb64(VP9_COMP *cpi, MODE_INFO *mi, MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; const int segment_id = mbmi->segment_id; - const int ymbs = MIN(4, mb_rows_left); - const int xmbs = MIN(4, mb_cols_left); + const int bh = 1 << mb_height_log2(bsize), bw = 1 << mb_width_log2(bsize); + const int ymbs = MIN(bh, mb_rows_left); + const int xmbs = MIN(bw, mb_cols_left); xd->mode_info_context = mi; assert((vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)) || @@ -1470,8 +1437,9 @@ static void reset_skip_txfm_size(VP9_COMP *cpi, TX_SIZE txfm_max) { mi = mi_ptr; for (mb_col = 0; mb_col < cm->mb_cols; mb_col += 4, mi += 4) { if (mi->mbmi.sb_type == BLOCK_SIZE_SB64X64) { - reset_skip_txfm_size_sb64(cpi, mi, mis, txfm_max, - cm->mb_rows - mb_row, cm->mb_cols - mb_col); + reset_skip_txfm_size_sb(cpi, mi, mis, txfm_max, + cm->mb_rows - mb_row, cm->mb_cols - mb_col, + BLOCK_SIZE_SB64X64); } else { int i; @@ -1484,9 +1452,10 @@ static void reset_skip_txfm_size(VP9_COMP *cpi, TX_SIZE txfm_max) { continue; if (sb_mi->mbmi.sb_type) { - reset_skip_txfm_size_sb32(cpi, sb_mi, mis, txfm_max, - cm->mb_rows - mb_row - y_idx_sb, - cm->mb_cols - mb_col - x_idx_sb); + reset_skip_txfm_size_sb(cpi, sb_mi, mis, txfm_max, + cm->mb_rows - mb_row - y_idx_sb, + cm->mb_cols - mb_col - x_idx_sb, + BLOCK_SIZE_SB32X32); } else { int m; @@ -1500,7 +1469,10 @@ static void reset_skip_txfm_size(VP9_COMP *cpi, TX_SIZE txfm_max) { mb_mi = mi + y_idx * mis + x_idx; assert(mb_mi->mbmi.sb_type == BLOCK_SIZE_MB16X16); - reset_skip_txfm_size_mb(cpi, mb_mi, txfm_max); + reset_skip_txfm_size_sb(cpi, mb_mi, mis, txfm_max, + cm->mb_rows - mb_row - y_idx, + cm->mb_cols - mb_col - x_idx, + BLOCK_SIZE_MB16X16); } } } @@ -1804,117 +1776,6 @@ static void adjust_act_zbin(VP9_COMP *cpi, MACROBLOCK *x) { #endif } -static void update_sb64_skip_coeff_state(VP9_COMP *cpi, - ENTROPY_CONTEXT_PLANES ta[16], - ENTROPY_CONTEXT_PLANES tl[16], - TOKENEXTRA *t[16], - TOKENEXTRA **tp, - int skip[16], int output_enabled) { - MACROBLOCK *const x = &cpi->mb; - - if (x->e_mbd.mode_info_context->mbmi.txfm_size == TX_32X32) { - TOKENEXTRA tokens[4][1024+512]; - int n_tokens[4], n; - - // if there were no skips, we don't need to do anything - if (!skip[0] && !skip[1] && !skip[2] && !skip[3]) - return; - - // if we don't do coeff skipping for this frame, we don't - // need to do anything here - if (!cpi->common.mb_no_coeff_skip) - return; - - // if all 4 MBs skipped coeff coding, nothing to be done - if (skip[0] && skip[1] && skip[2] && skip[3]) - return; - - // so the situation now is that we want to skip coeffs - // for some MBs, but not all, and we didn't code EOB - // coefficients for them. However, the skip flag for this - // SB will be 0 overall, so we need to insert EOBs in the - // middle of the token tree. Do so here. - for (n = 0; n < 4; n++) { - if (n < 3) { - n_tokens[n] = t[n + 1] - t[n]; - } else { - n_tokens[n] = *tp - t[3]; - } - if (n_tokens[n]) { - memcpy(tokens[n], t[n], n_tokens[n] * sizeof(*t[0])); - } - } - - // reset pointer, stuff EOBs where necessary - *tp = t[0]; - for (n = 0; n < 4; n++) { - if (skip[n]) { - x->e_mbd.above_context = &ta[n * 2]; - x->e_mbd.left_context = &tl[n * 2]; - vp9_stuff_sb(cpi, &x->e_mbd, tp, !output_enabled); - } else { - if (n_tokens[n]) { - memcpy(*tp, tokens[n], sizeof(*t[0]) * n_tokens[n]); - } - (*tp) += n_tokens[n]; - } - } - } else { - TOKENEXTRA tokens[16][16 * 25]; - int n_tokens[16], n; - - // if there were no skips, we don't need to do anything - if (!skip[ 0] && !skip[ 1] && !skip[ 2] && !skip[ 3] && - !skip[ 4] && !skip[ 5] && !skip[ 6] && !skip[ 7] && - !skip[ 8] && !skip[ 9] && !skip[10] && !skip[11] && - !skip[12] && !skip[13] && !skip[14] && !skip[15]) - return; - - // if we don't do coeff skipping for this frame, we don't - // need to do anything here - if (!cpi->common.mb_no_coeff_skip) - return; - - // if all 4 MBs skipped coeff coding, nothing to be done - if (skip[ 0] && skip[ 1] && skip[ 2] && skip[ 3] && - skip[ 4] && skip[ 5] && skip[ 6] && skip[ 7] && - skip[ 8] && skip[ 9] && skip[10] && skip[11] && - skip[12] && skip[13] && skip[14] && skip[15]) - return; - - // so the situation now is that we want to skip coeffs - // for some MBs, but not all, and we didn't code EOB - // coefficients for them. However, the skip flag for this - // SB will be 0 overall, so we need to insert EOBs in the - // middle of the token tree. Do so here. - for (n = 0; n < 16; n++) { - if (n < 15) { - n_tokens[n] = t[n + 1] - t[n]; - } else { - n_tokens[n] = *tp - t[15]; - } - if (n_tokens[n]) { - memcpy(tokens[n], t[n], n_tokens[n] * sizeof(*t[0])); - } - } - - // reset pointer, stuff EOBs where necessary - *tp = t[0]; - for (n = 0; n < 16; n++) { - if (skip[n]) { - x->e_mbd.above_context = &ta[n]; - x->e_mbd.left_context = &tl[n]; - vp9_stuff_mb(cpi, &x->e_mbd, tp, !output_enabled); - } else { - if (n_tokens[n]) { - memcpy(*tp, tokens[n], sizeof(*t[0]) * n_tokens[n]); - } - (*tp) += n_tokens[n]; - } - } - } -} - #if CONFIG_CODE_NONZEROCOUNT static void gather_nzcs_mb16(VP9_COMMON *const cm, MACROBLOCKD *xd) { @@ -2300,231 +2161,9 @@ static void encode_macroblock(VP9_COMP *cpi, TOKENEXTRA **t, } } -static void encode_superblock32(VP9_COMP *cpi, TOKENEXTRA **t, - int output_enabled, int mb_row, int mb_col) { - VP9_COMMON *const cm = &cpi->common; - MACROBLOCK *const x = &cpi->mb; - MACROBLOCKD *const xd = &x->e_mbd; - const uint8_t *src = x->src.y_buffer; - uint8_t *dst = xd->dst.y_buffer; - const uint8_t *usrc = x->src.u_buffer; - uint8_t *udst = xd->dst.u_buffer; - const uint8_t *vsrc = x->src.v_buffer; - uint8_t *vdst = xd->dst.v_buffer; - int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride; - int src_uv_stride = x->src.uv_stride, dst_uv_stride = xd->dst.uv_stride; - unsigned char ref_pred_flag; - MODE_INFO *mi = x->e_mbd.mode_info_context; - unsigned int segment_id = mi->mbmi.segment_id; - const int mis = cm->mode_info_stride; - -#ifdef ENC_DEBUG - enc_debug = (cpi->common.current_video_frame == 11 && cm->show_frame && - mb_row == 8 && mb_col == 0 && output_enabled); - if (enc_debug) { - printf("Encode SB32 %d %d output %d\n", mb_row, mb_col, output_enabled); - printf("Mode %d skip %d tx_size %d ref %d ref2 %d mv %d %d interp %d\n", - mi->mbmi.mode, x->skip, mi->mbmi.txfm_size, - mi->mbmi.ref_frame, mi->mbmi.second_ref_frame, - mi->mbmi.mv[0].as_mv.row, mi->mbmi.mv[0].as_mv.col, - mi->mbmi.interp_filter); - } -#endif - if (cm->frame_type == KEY_FRAME) { - if (cpi->oxcf.tuning == VP8_TUNE_SSIM) { - adjust_act_zbin(cpi, x); - vp9_update_zbin_extra(cpi, x); - } - } else { - vp9_setup_interp_filters(xd, xd->mode_info_context->mbmi.interp_filter, cm); - - if (cpi->oxcf.tuning == VP8_TUNE_SSIM) { - // Adjust the zbin based on this MB rate. - adjust_act_zbin(cpi, x); - } - - // Experimental code. Special case for gf and arf zeromv modes. - // Increase zbin size to suppress noise - cpi->zbin_mode_boost = 0; - if (cpi->zbin_mode_boost_enabled) { - if (xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME) { - if (xd->mode_info_context->mbmi.mode == ZEROMV) { - if (xd->mode_info_context->mbmi.ref_frame != LAST_FRAME) - cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST; - else - cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST; - } else if (xd->mode_info_context->mbmi.mode == SPLITMV) - cpi->zbin_mode_boost = SPLIT_MV_ZBIN_BOOST; - else - cpi->zbin_mode_boost = MV_ZBIN_BOOST; - } else { - cpi->zbin_mode_boost = INTRA_ZBIN_BOOST; - } - } - - vp9_update_zbin_extra(cpi, x); - - // SET VARIOUS PREDICTION FLAGS - // Did the chosen reference frame match its predicted value. - ref_pred_flag = ((xd->mode_info_context->mbmi.ref_frame == - vp9_get_pred_ref(cm, xd))); - vp9_set_pred_flag(xd, PRED_REF, ref_pred_flag); - } - - - if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) { - vp9_build_intra_predictors_sby_s(&x->e_mbd); - vp9_build_intra_predictors_sbuv_s(&x->e_mbd); - if (output_enabled) - sum_intra_stats(cpi, x); - } else { - int ref_fb_idx; - - assert(cm->frame_type != KEY_FRAME); - - if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME) - ref_fb_idx = cpi->common.ref_frame_map[cpi->lst_fb_idx]; - else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME) - ref_fb_idx = cpi->common.ref_frame_map[cpi->gld_fb_idx]; - else - ref_fb_idx = cpi->common.ref_frame_map[cpi->alt_fb_idx]; - - setup_pred_block(&xd->pre, - &cpi->common.yv12_fb[ref_fb_idx], - mb_row, mb_col, - &xd->scale_factor[0], &xd->scale_factor_uv[0]); - - if (xd->mode_info_context->mbmi.second_ref_frame > 0) { - int second_ref_fb_idx; - - if (xd->mode_info_context->mbmi.second_ref_frame == LAST_FRAME) - second_ref_fb_idx = cpi->common.ref_frame_map[cpi->lst_fb_idx]; - else if (xd->mode_info_context->mbmi.second_ref_frame == GOLDEN_FRAME) - second_ref_fb_idx = cpi->common.ref_frame_map[cpi->gld_fb_idx]; - else - second_ref_fb_idx = cpi->common.ref_frame_map[cpi->alt_fb_idx]; - - setup_pred_block(&xd->second_pre, - &cpi->common.yv12_fb[second_ref_fb_idx], - mb_row, mb_col, - &xd->scale_factor[1], &xd->scale_factor_uv[1]); - } - - vp9_build_inter32x32_predictors_sb(xd, mb_row, mb_col); - } - - if (!x->skip) { - vp9_subtract_sby_s_c(x->src_diff, src, src_y_stride, - dst, dst_y_stride); - vp9_subtract_sbuv_s_c(x->src_diff, - usrc, vsrc, src_uv_stride, - udst, vdst, dst_uv_stride); - switch (mi->mbmi.txfm_size) { - case TX_32X32: - vp9_transform_sby_32x32(x); - vp9_transform_sbuv_16x16(x); - vp9_quantize_sby_32x32(x); - vp9_quantize_sbuv_16x16(x); - if (x->optimize) { - vp9_optimize_sby_32x32(cm, x); - vp9_optimize_sbuv_16x16(cm, x); - } - vp9_inverse_transform_sby_32x32(xd); - vp9_inverse_transform_sbuv_16x16(xd); - break; - case TX_16X16: - vp9_transform_sby_16x16(x); - vp9_transform_sbuv_16x16(x); - vp9_quantize_sby_16x16(x); - vp9_quantize_sbuv_16x16(x); - if (x->optimize) { - vp9_optimize_sby_16x16(cm, x); - vp9_optimize_sbuv_16x16(cm, x); - } - vp9_inverse_transform_sby_16x16(xd); - vp9_inverse_transform_sbuv_16x16(xd); - break; - case TX_8X8: - vp9_transform_sby_8x8(x); - vp9_transform_sbuv_8x8(x); - vp9_quantize_sby_8x8(x); - vp9_quantize_sbuv_8x8(x); - if (x->optimize) { - vp9_optimize_sby_8x8(cm, x); - vp9_optimize_sbuv_8x8(cm, x); - } - vp9_inverse_transform_sby_8x8(xd); - vp9_inverse_transform_sbuv_8x8(xd); - break; - case TX_4X4: - vp9_transform_sby_4x4(x); - vp9_transform_sbuv_4x4(x); - vp9_quantize_sby_4x4(x); - vp9_quantize_sbuv_4x4(x); - if (x->optimize) { - vp9_optimize_sby_4x4(cm, x); - vp9_optimize_sbuv_4x4(cm, x); - } - vp9_inverse_transform_sby_4x4(xd); - vp9_inverse_transform_sbuv_4x4(xd); - break; - default: assert(0); - } - vp9_recon_sby_s_c(xd, dst); - vp9_recon_sbuv_s_c(xd, udst, vdst); -#if CONFIG_CODE_NONZEROCOUNT - gather_nzcs_sb32(cm, xd); -#endif - - vp9_tokenize_sb(cpi, xd, t, !output_enabled); - } else { - // FIXME(rbultje): not tile-aware (mi - 1) - int mb_skip_context = cm->mb_no_coeff_skip ? - (mi - 1)->mbmi.mb_skip_coeff + (mi - mis)->mbmi.mb_skip_coeff : 0; - - mi->mbmi.mb_skip_coeff = 1; - if (cm->mb_no_coeff_skip) { - if (output_enabled) - cpi->skip_true_count[mb_skip_context]++; - vp9_reset_sb_tokens_context(xd); - } else { - vp9_stuff_sb(cpi, xd, t, !output_enabled); - if (output_enabled) - cpi->skip_false_count[mb_skip_context]++; - } - } - - // copy skip flag on all mb_mode_info contexts in this SB - // if this was a skip at this txfm size - if (mb_col < cm->mb_cols - 1) - mi[1].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff; - if (mb_row < cm->mb_rows - 1) { - mi[mis].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff; - if (mb_col < cm->mb_cols - 1) - mi[mis + 1].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff; - } - - if (output_enabled) { - if (cm->txfm_mode == TX_MODE_SELECT && - !((cm->mb_no_coeff_skip && mi->mbmi.mb_skip_coeff) || - (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)))) { - cpi->txfm_count_32x32p[mi->mbmi.txfm_size]++; - } else { - TX_SIZE sz = (cm->txfm_mode == TX_MODE_SELECT) ? TX_32X32 : cm->txfm_mode; - mi->mbmi.txfm_size = sz; - if (mb_col < cm->mb_cols - 1) - mi[1].mbmi.txfm_size = sz; - if (mb_row < cm->mb_rows - 1) { - mi[mis].mbmi.txfm_size = sz; - if (mb_col < cm->mb_cols - 1) - mi[mis + 1].mbmi.txfm_size = sz; - } - } - } -} - -static void encode_superblock64(VP9_COMP *cpi, TOKENEXTRA **t, - int output_enabled, int mb_row, int mb_col) { +static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, + int output_enabled, int mb_row, int mb_col, + BLOCK_SIZE_TYPE bsize) { VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; @@ -2541,13 +2180,9 @@ static void encode_superblock64(VP9_COMP *cpi, TOKENEXTRA **t, MODE_INFO *mi = x->e_mbd.mode_info_context; unsigned int segment_id = mi->mbmi.segment_id; const int mis = cm->mode_info_stride; + const int bwl = mb_width_log2(bsize); + const int bw = 1 << bwl, bh = 1 << mb_height_log2(bsize); -#ifdef ENC_DEBUG - enc_debug = (cpi->common.current_video_frame == 11 && cm->show_frame && - mb_row == 8 && mb_col == 0 && output_enabled); - if (enc_debug) - printf("Encode SB64 %d %d output %d\n", mb_row, mb_col, output_enabled); -#endif if (cm->frame_type == KEY_FRAME) { if (cpi->oxcf.tuning == VP8_TUNE_SSIM) { adjust_act_zbin(cpi, x); @@ -2590,8 +2225,13 @@ static void encode_superblock64(VP9_COMP *cpi, TOKENEXTRA **t, } if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) { - vp9_build_intra_predictors_sb64y_s(&x->e_mbd); - vp9_build_intra_predictors_sb64uv_s(&x->e_mbd); + if (bsize == BLOCK_SIZE_SB32X32) { + vp9_build_intra_predictors_sby_s(&x->e_mbd); + vp9_build_intra_predictors_sbuv_s(&x->e_mbd); + } else { + vp9_build_intra_predictors_sb64y_s(&x->e_mbd); + vp9_build_intra_predictors_sb64uv_s(&x->e_mbd); + } if (output_enabled) sum_intra_stats(cpi, x); } else { @@ -2627,71 +2267,102 @@ static void encode_superblock64(VP9_COMP *cpi, TOKENEXTRA **t, &xd->scale_factor[1], &xd->scale_factor_uv[1]); } - vp9_build_inter64x64_predictors_sb(xd, mb_row, mb_col); + if (bsize == BLOCK_SIZE_SB32X32) { + vp9_build_inter32x32_predictors_sb(xd, mb_row, mb_col); + } else { + vp9_build_inter64x64_predictors_sb(xd, mb_row, mb_col); + } } if (!x->skip) { - vp9_subtract_sb64y_s_c(x->src_diff, src, src_y_stride, dst, dst_y_stride); - vp9_subtract_sb64uv_s_c(x->src_diff, usrc, vsrc, src_uv_stride, - udst, vdst, dst_uv_stride); + vp9_subtract_sby_s_c(x->src_diff, src, src_y_stride, dst, dst_y_stride, + bsize); + vp9_subtract_sbuv_s_c(x->src_diff, usrc, vsrc, src_uv_stride, + udst, vdst, dst_uv_stride, bsize); switch (xd->mode_info_context->mbmi.txfm_size) { case TX_32X32: - vp9_transform_sb64y_32x32(x); - vp9_transform_sb64uv_32x32(x); - vp9_quantize_sb64y_32x32(x); - vp9_quantize_sb64uv_32x32(x); + vp9_transform_sby_32x32(x, bsize); + vp9_quantize_sby_32x32(x, bsize); + if (bsize == BLOCK_SIZE_SB64X64) { + vp9_transform_sbuv_32x32(x, bsize); + vp9_quantize_sbuv_32x32(x, bsize); + } else { + vp9_transform_sbuv_16x16(x, bsize); + vp9_quantize_sbuv_16x16(x, bsize); + } if (x->optimize) { - vp9_optimize_sb64y_32x32(cm, x); - vp9_optimize_sb64uv_32x32(cm, x); + vp9_optimize_sby_32x32(cm, x, bsize); + if (bsize == BLOCK_SIZE_SB64X64) + vp9_optimize_sbuv_32x32(cm, x, bsize); + else + vp9_optimize_sbuv_16x16(cm, x, bsize); } - vp9_inverse_transform_sb64y_32x32(xd); - vp9_inverse_transform_sb64uv_32x32(xd); + vp9_inverse_transform_sby_32x32(xd, bsize); + if (bsize == BLOCK_SIZE_SB64X64) + vp9_inverse_transform_sbuv_32x32(xd, bsize); + else + vp9_inverse_transform_sbuv_16x16(xd, bsize); break; case TX_16X16: - vp9_transform_sb64y_16x16(x); - vp9_transform_sb64uv_16x16(x); - vp9_quantize_sb64y_16x16(x); - vp9_quantize_sb64uv_16x16(x); + vp9_transform_sby_16x16(x, bsize); + vp9_quantize_sby_16x16(x, bsize); + if (bsize >= BLOCK_SIZE_SB32X32) { + vp9_transform_sbuv_16x16(x, bsize); + vp9_quantize_sbuv_16x16(x, bsize); + } else { + vp9_transform_sbuv_8x8(x, bsize); + vp9_quantize_sbuv_8x8(x, bsize); + } if (x->optimize) { - vp9_optimize_sb64y_16x16(cm, x); - vp9_optimize_sb64uv_16x16(cm, x); + vp9_optimize_sby_16x16(cm, x, bsize); + if (bsize >= BLOCK_SIZE_SB32X32) + vp9_optimize_sbuv_16x16(cm, x, bsize); + else + vp9_optimize_sbuv_8x8(cm, x, bsize); } - vp9_inverse_transform_sb64y_16x16(xd); - vp9_inverse_transform_sb64uv_16x16(xd); + vp9_inverse_transform_sby_16x16(xd, bsize); + if (bsize >= BLOCK_SIZE_SB32X32) + vp9_inverse_transform_sbuv_16x16(xd, bsize); + else + vp9_inverse_transform_sbuv_8x8(xd, bsize); break; case TX_8X8: - vp9_transform_sb64y_8x8(x); - vp9_transform_sb64uv_8x8(x); - vp9_quantize_sb64y_8x8(x); - vp9_quantize_sb64uv_8x8(x); + vp9_transform_sby_8x8(x, bsize); + vp9_transform_sbuv_8x8(x, bsize); + vp9_quantize_sby_8x8(x, bsize); + vp9_quantize_sbuv_8x8(x, bsize); if (x->optimize) { - vp9_optimize_sb64y_8x8(cm, x); - vp9_optimize_sb64uv_8x8(cm, x); + vp9_optimize_sby_8x8(cm, x, bsize); + vp9_optimize_sbuv_8x8(cm, x, bsize); } - vp9_inverse_transform_sb64y_8x8(xd); - vp9_inverse_transform_sb64uv_8x8(xd); + vp9_inverse_transform_sby_8x8(xd, bsize); + vp9_inverse_transform_sbuv_8x8(xd, bsize); break; case TX_4X4: - vp9_transform_sb64y_4x4(x); - vp9_transform_sb64uv_4x4(x); - vp9_quantize_sb64y_4x4(x); - vp9_quantize_sb64uv_4x4(x); + vp9_transform_sby_4x4(x, bsize); + vp9_transform_sbuv_4x4(x, bsize); + vp9_quantize_sby_4x4(x, bsize); + vp9_quantize_sbuv_4x4(x, bsize); if (x->optimize) { - vp9_optimize_sb64y_4x4(cm, x); - vp9_optimize_sb64uv_4x4(cm, x); + vp9_optimize_sby_4x4(cm, x, bsize); + vp9_optimize_sbuv_4x4(cm, x, bsize); } - vp9_inverse_transform_sb64y_4x4(xd); - vp9_inverse_transform_sb64uv_4x4(xd); + vp9_inverse_transform_sby_4x4(xd, bsize); + vp9_inverse_transform_sbuv_4x4(xd, bsize); break; default: assert(0); } - vp9_recon_sb64y_s_c(xd, dst); - vp9_recon_sb64uv_s_c(&x->e_mbd, udst, vdst); + vp9_recon_sby_s_c(xd, dst, bsize); + vp9_recon_sbuv_s_c(&x->e_mbd, udst, vdst, bsize); #if CONFIG_CODE_NONZEROCOUNT - gather_nzcs_sb64(cm, &x->e_mbd); + if (bsize == BLOCK_SIZE_SB32X32) { + gather_nzcs_sb32(cm, &x->e_mbd); + } else { + gather_nzcs_sb64(cm, &x->e_mbd); + } #endif - vp9_tokenize_sb64(cpi, &x->e_mbd, t, !output_enabled); + vp9_tokenize_sb(cpi, &x->e_mbd, t, !output_enabled, bsize); } else { // FIXME(rbultje): not tile-aware (mi - 1) int mb_skip_context = cpi->common.mb_no_coeff_skip ? @@ -2701,9 +2372,9 @@ static void encode_superblock64(VP9_COMP *cpi, TOKENEXTRA **t, if (cm->mb_no_coeff_skip) { if (output_enabled) cpi->skip_true_count[mb_skip_context]++; - vp9_reset_sb64_tokens_context(xd); + vp9_reset_sb_tokens_context(xd, bsize); } else { - vp9_stuff_sb64(cpi, xd, t, !output_enabled); + vp9_stuff_sb(cpi, xd, t, !output_enabled, bsize); if (output_enabled) cpi->skip_false_count[mb_skip_context]++; } @@ -2711,8 +2382,8 @@ static void encode_superblock64(VP9_COMP *cpi, TOKENEXTRA **t, // copy skip flag on all mb_mode_info contexts in this SB // if this was a skip at this txfm size - for (n = 1; n < 16; n++) { - const int x_idx = n & 3, y_idx = n >> 2; + for (n = 1; n < bw * bh; n++) { + const int x_idx = n & (bw - 1), y_idx = n >> bwl; if (mb_col + x_idx < cm->mb_cols && mb_row + y_idx < cm->mb_rows) mi[x_idx + y_idx * mis].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff; } @@ -2721,12 +2392,20 @@ static void encode_superblock64(VP9_COMP *cpi, TOKENEXTRA **t, if (cm->txfm_mode == TX_MODE_SELECT && !((cm->mb_no_coeff_skip && mi->mbmi.mb_skip_coeff) || (vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP)))) { - cpi->txfm_count_32x32p[mi->mbmi.txfm_size]++; + if (bsize >= BLOCK_SIZE_SB32X32) { + cpi->txfm_count_32x32p[mi->mbmi.txfm_size]++; + } else { + cpi->txfm_count_16x16p[mi->mbmi.txfm_size]++; + } } else { int x, y; TX_SIZE sz = (cm->txfm_mode == TX_MODE_SELECT) ? TX_32X32 : cm->txfm_mode; - for (y = 0; y < 4; y++) { - for (x = 0; x < 4; x++) { + + if (sz == TX_32X32 && bsize < BLOCK_SIZE_SB32X32) + sz = TX_16X16; + + for (y = 0; y < bh; y++) { + for (x = 0; x < bw; x++) { if (mb_col + x < cm->mb_cols && mb_row + y < cm->mb_rows) { mi[mis * y + x].mbmi.txfm_size = sz; } diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index f0c215d..75db660 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -106,14 +106,16 @@ void vp9_subtract_mby_s_c(int16_t *diff, const uint8_t *src, int src_stride, } void vp9_subtract_sby_s_c(int16_t *diff, const uint8_t *src, int src_stride, - const uint8_t *pred, int dst_stride) { + const uint8_t *pred, int dst_stride, + BLOCK_SIZE_TYPE bsize) { + const int bh = 16 << mb_height_log2(bsize), bw = 16 << mb_width_log2(bsize); int r, c; - for (r = 0; r < 32; r++) { - for (c = 0; c < 32; c++) + for (r = 0; r < bh; r++) { + for (c = 0; c < bw; c++) diff[c] = src[c] - pred[c]; - diff += 32; + diff += bw; pred += dst_stride; src += src_stride; } @@ -122,69 +124,29 @@ void vp9_subtract_sby_s_c(int16_t *diff, const uint8_t *src, int src_stride, void vp9_subtract_sbuv_s_c(int16_t *diff, const uint8_t *usrc, const uint8_t *vsrc, int src_stride, const uint8_t *upred, - const uint8_t *vpred, int dst_stride) { - int16_t *udiff = diff + 1024; - int16_t *vdiff = diff + 1024 + 256; + const uint8_t *vpred, int dst_stride, + BLOCK_SIZE_TYPE bsize) { + const int bhl = mb_height_log2(bsize), bwl = mb_width_log2(bsize); + const int uoff = (16 * 16) << (bhl + bwl), voff = (uoff * 5) >> 2; + const int bw = 8 << bwl, bh = 8 << bhl; + int16_t *udiff = diff + uoff; + int16_t *vdiff = diff + voff; int r, c; - for (r = 0; r < 16; r++) { - for (c = 0; c < 16; c++) + for (r = 0; r < bh; r++) { + for (c = 0; c < bw; c++) udiff[c] = usrc[c] - upred[c]; - udiff += 16; + udiff += bw; upred += dst_stride; usrc += src_stride; } - for (r = 0; r < 16; r++) { - for (c = 0; c < 16; c++) + for (r = 0; r < bh; r++) { + for (c = 0; c < bw; c++) vdiff[c] = vsrc[c] - vpred[c]; - vdiff += 16; - vpred += dst_stride; - vsrc += src_stride; - } -} - -void vp9_subtract_sb64y_s_c(int16_t *diff, const uint8_t *src, int src_stride, - const uint8_t *pred, int dst_stride) { - int r, c; - - for (r = 0; r < 64; r++) { - for (c = 0; c < 64; c++) { - diff[c] = src[c] - pred[c]; - } - - diff += 64; - pred += dst_stride; - src += src_stride; - } -} - -void vp9_subtract_sb64uv_s_c(int16_t *diff, const uint8_t *usrc, - const uint8_t *vsrc, int src_stride, - const uint8_t *upred, - const uint8_t *vpred, int dst_stride) { - int16_t *udiff = diff + 4096; - int16_t *vdiff = diff + 4096 + 1024; - int r, c; - - for (r = 0; r < 32; r++) { - for (c = 0; c < 32; c++) { - udiff[c] = usrc[c] - upred[c]; - } - - udiff += 32; - upred += dst_stride; - usrc += src_stride; - } - - for (r = 0; r < 32; r++) { - for (c = 0; c < 32; c++) { - vdiff[c] = vsrc[c] - vpred[c]; - } - - vdiff += 32; + vdiff += bw; vpred += dst_stride; vsrc += src_stride; } @@ -288,164 +250,86 @@ void vp9_transform_mb_16x16(MACROBLOCK *x) { vp9_transform_mbuv_8x8(x); } -void vp9_transform_sby_32x32(MACROBLOCK *x) { - vp9_short_fdct32x32(x->src_diff, x->coeff, 64); -} - -void vp9_transform_sby_16x16(MACROBLOCK *x) { - MACROBLOCKD *const xd = &x->e_mbd; - int n; - - for (n = 0; n < 4; n++) { - const int x_idx = n & 1, y_idx = n >> 1; - const TX_TYPE tx_type = get_tx_type_16x16(xd, (y_idx * 8 + x_idx) * 4); - - if (tx_type != DCT_DCT) { - vp9_short_fht16x16(x->src_diff + y_idx * 32 * 16 + x_idx * 16, - x->coeff + n * 256, 32, tx_type); - } else { - x->fwd_txm16x16(x->src_diff + y_idx * 32 * 16 + x_idx * 16, - x->coeff + n * 256, 64); - } - } -} - -void vp9_transform_sby_8x8(MACROBLOCK *x) { - MACROBLOCKD *const xd = &x->e_mbd; - int n; - - for (n = 0; n < 16; n++) { - const int x_idx = n & 3, y_idx = n >> 2; - const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * 8 + x_idx) * 2); - - if (tx_type != DCT_DCT) { - vp9_short_fht8x8(x->src_diff + y_idx * 32 * 8 + x_idx * 8, - x->coeff + n * 64, 32, tx_type); - } else { - x->fwd_txm8x8(x->src_diff + y_idx * 32 * 8 + x_idx * 8, - x->coeff + n * 64, 64); - } - } -} - -void vp9_transform_sby_4x4(MACROBLOCK *x) { - MACROBLOCKD *const xd = &x->e_mbd; - int n; - - for (n = 0; n < 64; n++) { - const int x_idx = n & 7, y_idx = n >> 3; - const TX_TYPE tx_type = get_tx_type_4x4(xd, y_idx * 8 + x_idx); - - if (tx_type != DCT_DCT) { - vp9_short_fht4x4(x->src_diff + y_idx * 32 * 4 + x_idx * 4, - x->coeff + n * 16, 32, tx_type); - } else { - x->fwd_txm4x4(x->src_diff + y_idx * 32 * 4 + x_idx * 4, - x->coeff + n * 16, 64); - } - } -} - -void vp9_transform_sbuv_16x16(MACROBLOCK *x) { - vp9_clear_system_state(); - x->fwd_txm16x16(x->src_diff + 1024, x->coeff + 1024, 32); - x->fwd_txm16x16(x->src_diff + 1280, x->coeff + 1280, 32); -} - -void vp9_transform_sbuv_8x8(MACROBLOCK *x) { - int n; - - vp9_clear_system_state(); - for (n = 0; n < 4; n++) { - const int x_idx = n & 1, y_idx = n >> 1; - - x->fwd_txm8x8(x->src_diff + 1024 + y_idx * 16 * 8 + x_idx * 8, - x->coeff + 1024 + n * 64, 32); - x->fwd_txm8x8(x->src_diff + 1280 + y_idx * 16 * 8 + x_idx * 8, - x->coeff + 1280 + n * 64, 32); - } -} - -void vp9_transform_sbuv_4x4(MACROBLOCK *x) { - int n; - - vp9_clear_system_state(); - for (n = 0; n < 16; n++) { - const int x_idx = n & 3, y_idx = n >> 2; - - x->fwd_txm4x4(x->src_diff + 1024 + y_idx * 16 * 4 + x_idx * 4, - x->coeff + 1024 + n * 16, 32); - x->fwd_txm4x4(x->src_diff + 1280 + y_idx * 16 * 4 + x_idx * 4, - x->coeff + 1280 + n * 16, 32); - } -} - -void vp9_transform_sb64y_32x32(MACROBLOCK *x) { +void vp9_transform_sby_32x32(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { + const int bwl = mb_width_log2(bsize) - 1, bw = 1 << bwl; + const int bh = 1 << (mb_height_log2(bsize) - 1); + const int stride = 32 << bwl; int n; - for (n = 0; n < 4; n++) { - const int x_idx = n & 1, y_idx = n >> 1; + for (n = 0; n < bw * bh; n++) { + const int x_idx = n & (bw - 1), y_idx = n >> bwl; - vp9_short_fdct32x32(x->src_diff + y_idx * 64 * 32 + x_idx * 32, - x->coeff + n * 1024, 128); + vp9_short_fdct32x32(x->src_diff + y_idx * stride * 32 + x_idx * 32, + x->coeff + n * 1024, stride * 2); } } -void vp9_transform_sb64y_16x16(MACROBLOCK *x) { +void vp9_transform_sby_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { + const int bwl = mb_width_log2(bsize), bw = 1 << bwl; + const int bh = 1 << mb_height_log2(bsize); + const int stride = 16 << bwl, bstride = 4 << bwl; MACROBLOCKD *const xd = &x->e_mbd; int n; - for (n = 0; n < 16; n++) { - const int x_idx = n & 3, y_idx = n >> 2; - const TX_TYPE tx_type = get_tx_type_16x16(xd, (y_idx * 16 + x_idx) * 4); + for (n = 0; n < bw * bh; n++) { + const int x_idx = n & (bw - 1), y_idx = n >> bwl; + const TX_TYPE tx_type = get_tx_type_16x16(xd, + (y_idx * bstride + x_idx) * 4); if (tx_type != DCT_DCT) { - vp9_short_fht16x16(x->src_diff + y_idx * 64 * 16 + x_idx * 16, - x->coeff + n * 256, 64, tx_type); + vp9_short_fht16x16(x->src_diff + y_idx * stride * 16 + x_idx * 16, + x->coeff + n * 256, stride, tx_type); } else { - x->fwd_txm16x16(x->src_diff + y_idx * 64 * 16 + x_idx * 16, - x->coeff + n * 256, 128); + x->fwd_txm16x16(x->src_diff + y_idx * stride * 16 + x_idx * 16, + x->coeff + n * 256, stride * 2); } } } -void vp9_transform_sb64y_8x8(MACROBLOCK *x) { +void vp9_transform_sby_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { + const int bwl = mb_width_log2(bsize) + 1, bw = 1 << bwl; + const int bh = 1 << (mb_height_log2(bsize) + 1); + const int stride = 8 << bwl, bstride = 2 << bwl; MACROBLOCKD *const xd = &x->e_mbd; int n; - for (n = 0; n < 64; n++) { - const int x_idx = n & 7, y_idx = n >> 3; - const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * 16 + x_idx) * 2); + for (n = 0; n < bw * bh; n++) { + const int x_idx = n & (bw - 1), y_idx = n >> bwl; + const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * bstride + x_idx) * 2); if (tx_type != DCT_DCT) { - vp9_short_fht8x8(x->src_diff + y_idx * 64 * 8 + x_idx * 8, - x->coeff + n * 64, 64, tx_type); + vp9_short_fht8x8(x->src_diff + y_idx * stride * 8 + x_idx * 8, + x->coeff + n * 64, stride, tx_type); } else { - x->fwd_txm8x8(x->src_diff + y_idx * 64 * 8 + x_idx * 8, - x->coeff + n * 64, 128); + x->fwd_txm8x8(x->src_diff + y_idx * stride * 8 + x_idx * 8, + x->coeff + n * 64, stride * 2); } } } -void vp9_transform_sb64y_4x4(MACROBLOCK *x) { +void vp9_transform_sby_4x4(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { + const int bwl = mb_width_log2(bsize) + 2, bw = 1 << bwl; + const int bh = 1 << (mb_height_log2(bsize) + 2); + const int stride = 4 << bwl; MACROBLOCKD *const xd = &x->e_mbd; int n; - for (n = 0; n < 256; n++) { - const int x_idx = n & 15, y_idx = n >> 4; - const TX_TYPE tx_type = get_tx_type_4x4(xd, y_idx * 16 + x_idx); + for (n = 0; n < bw * bh; n++) { + const int x_idx = n & (bw - 1), y_idx = n >> bwl; + const TX_TYPE tx_type = get_tx_type_4x4(xd, n); if (tx_type != DCT_DCT) { - vp9_short_fht8x8(x->src_diff + y_idx * 64 * 4 + x_idx * 4, - x->coeff + n * 16, 64, tx_type); + vp9_short_fht4x4(x->src_diff + y_idx * stride * 4 + x_idx * 4, + x->coeff + n * 16, stride, tx_type); } else { - x->fwd_txm4x4(x->src_diff + y_idx * 64 * 4 + x_idx * 4, - x->coeff + n * 16, 128); + x->fwd_txm4x4(x->src_diff + y_idx * stride * 4 + x_idx * 4, + x->coeff + n * 16, stride * 2); } } } -void vp9_transform_sb64uv_32x32(MACROBLOCK *x) { +void vp9_transform_sbuv_32x32(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { + assert(bsize == BLOCK_SIZE_SB64X64); vp9_clear_system_state(); vp9_short_fdct32x32(x->src_diff + 4096, x->coeff + 4096, 64); @@ -453,45 +337,57 @@ void vp9_transform_sb64uv_32x32(MACROBLOCK *x) { x->coeff + 4096 + 1024, 64); } -void vp9_transform_sb64uv_16x16(MACROBLOCK *x) { +void vp9_transform_sbuv_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { + const int bwl = mb_width_log2(bsize), bhl = mb_height_log2(bsize); + const int uoff = (16 * 16) << (bwl + bhl), voff = (uoff * 5) >> 2; + const int bw = 1 << (bwl - 1), bh = 1 << (bhl - 1); + const int stride = 16 << (bwl - 1); int n; vp9_clear_system_state(); - for (n = 0; n < 4; n++) { - const int x_idx = n & 1, y_idx = n >> 1; + for (n = 0; n < bw * bh; n++) { + const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1); - x->fwd_txm16x16(x->src_diff + 4096 + y_idx * 32 * 16 + x_idx * 16, - x->coeff + 4096 + n * 256, 64); - x->fwd_txm16x16(x->src_diff + 4096 + 1024 + y_idx * 32 * 16 + x_idx * 16, - x->coeff + 4096 + 1024 + n * 256, 64); + x->fwd_txm16x16(x->src_diff + uoff + y_idx * stride * 16 + x_idx * 16, + x->coeff + uoff + n * 256, stride * 2); + x->fwd_txm16x16(x->src_diff + voff + y_idx * stride * 16 + x_idx * 16, + x->coeff + voff + n * 256, stride * 2); } } -void vp9_transform_sb64uv_8x8(MACROBLOCK *x) { +void vp9_transform_sbuv_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { + const int bwl = mb_width_log2(bsize) + 1, bhl = mb_height_log2(bsize) + 1; + const int uoff = (8 * 8) << (bwl + bhl), voff = (uoff * 5) >> 2; + const int bw = 1 << (bwl - 1), bh = 1 << (bhl - 1); + const int stride = 8 << (bwl - 1); int n; vp9_clear_system_state(); - for (n = 0; n < 16; n++) { - const int x_idx = n & 3, y_idx = n >> 2; + for (n = 0; n < bw * bh; n++) { + const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1); - x->fwd_txm8x8(x->src_diff + 4096 + y_idx * 32 * 8 + x_idx * 8, - x->coeff + 4096 + n * 64, 64); - x->fwd_txm8x8(x->src_diff + 4096 + 1024 + y_idx * 32 * 8 + x_idx * 8, - x->coeff + 4096 + 1024 + n * 64, 64); + x->fwd_txm8x8(x->src_diff + uoff + y_idx * stride * 8 + x_idx * 8, + x->coeff + uoff + n * 64, stride * 2); + x->fwd_txm8x8(x->src_diff + voff + y_idx * stride * 8 + x_idx * 8, + x->coeff + voff + n * 64, stride * 2); } } -void vp9_transform_sb64uv_4x4(MACROBLOCK *x) { +void vp9_transform_sbuv_4x4(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { + const int bwl = mb_width_log2(bsize) + 2, bhl = mb_height_log2(bsize) + 2; + const int uoff = (4 * 4) << (bwl + bhl), voff = (uoff * 5) >> 2; + const int bw = 1 << (bwl - 1), bh = 1 << (bhl - 1); + const int stride = 4 << (bwl - 1); int n; vp9_clear_system_state(); - for (n = 0; n < 64; n++) { - const int x_idx = n & 7, y_idx = n >> 3; + for (n = 0; n < bw * bh; n++) { + const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1); - x->fwd_txm4x4(x->src_diff + 4096 + y_idx * 32 * 4 + x_idx * 4, - x->coeff + 4096 + n * 16, 64); - x->fwd_txm4x4(x->src_diff + 4096 + 1024 + y_idx * 32 * 4 + x_idx * 4, - x->coeff + 4096 + 1024 + n * 16, 64); + x->fwd_txm4x4(x->src_diff + uoff + y_idx * stride * 4 + x_idx * 4, + x->coeff + uoff + n * 16, stride * 2); + x->fwd_txm4x4(x->src_diff + voff + y_idx * stride * 4 + x_idx * 4, + x->coeff + voff + n * 16, stride * 2); } } @@ -968,252 +864,120 @@ static void optimize_mb_16x16(VP9_COMMON *const cm, MACROBLOCK *x) { vp9_optimize_mbuv_8x8(cm, x); } -void vp9_optimize_sby_32x32(VP9_COMMON *const cm, MACROBLOCK *x) { - ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context; - ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1); - ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context; - ENTROPY_CONTEXT *l1 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 1); - ENTROPY_CONTEXT ta, tl; - - ta = (a[0] + a[1] + a[2] + a[3] + a1[0] + a1[1] + a1[2] + a1[3]) != 0; - tl = (l[0] + l[1] + l[2] + l[3] + l1[0] + l1[1] + l1[2] + l1[3]) != 0; - optimize_b(cm, x, 0, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, - &ta, &tl, TX_32X32, 64); -} - -void vp9_optimize_sby_16x16(VP9_COMMON *const cm, MACROBLOCK *x) { - ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context; - ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1); - ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context; - ENTROPY_CONTEXT *l1 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 1); +void vp9_optimize_sby_32x32(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize) { + const int bwl = mb_width_log2(bsize) - 1, bw = 1 << bwl; + const int bh = 1 << (mb_height_log2(bsize) - 1); ENTROPY_CONTEXT ta[2], tl[2]; int n; - ta[0] = (a[0] + a[1] + a[2] + a[3]) != 0; - ta[1] = (a1[0] + a1[1] + a1[2] + a1[3]) != 0; - tl[0] = (l[0] + l[1] + l[2] + l[3]) != 0; - tl[1] = (l1[0] + l1[1] + l1[2] + l1[3]) != 0; - for (n = 0; n < 4; n++) { - const int x_idx = n & 1, y_idx = n >> 1; - - optimize_b(cm, x, n * 16, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, - ta + x_idx, tl + y_idx, TX_16X16, 64); - } -} - -void vp9_optimize_sby_8x8(VP9_COMMON *const cm, MACROBLOCK *x) { - ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context; - ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1); - ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context; - ENTROPY_CONTEXT *l1 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 1); - ENTROPY_CONTEXT ta[4], tl[4]; - int n; - - ta[0] = (a[0] + a[1]) != 0; - ta[1] = (a[2] + a[3]) != 0; - ta[2] = (a1[0] + a1[1]) != 0; - ta[3] = (a1[2] + a1[3]) != 0; - tl[0] = (l[0] + l[1]) != 0; - tl[1] = (l[2] + l[3]) != 0; - tl[2] = (l1[0] + l1[1]) != 0; - tl[3] = (l1[2] + l1[3]) != 0; - for (n = 0; n < 16; n++) { - const int x_idx = n & 3, y_idx = n >> 2; - - optimize_b(cm, x, n * 4, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, - ta + x_idx, tl + y_idx, TX_8X8, 64); - } -} - -void vp9_optimize_sby_4x4(VP9_COMMON *const cm, MACROBLOCK *x) { - ENTROPY_CONTEXT ta[8], tl[8]; - int n; - - vpx_memcpy(ta, x->e_mbd.above_context, 4 * sizeof(ENTROPY_CONTEXT)); - vpx_memcpy(ta + 4, x->e_mbd.above_context + 1, 4 * sizeof(ENTROPY_CONTEXT)); - vpx_memcpy(tl, x->e_mbd.left_context, 4 * sizeof(ENTROPY_CONTEXT)); - vpx_memcpy(tl + 4, x->e_mbd.left_context + 1, 4 * sizeof(ENTROPY_CONTEXT)); - for (n = 0; n < 64; n++) { - const int x_idx = n & 7, y_idx = n >> 3; - - optimize_b(cm, x, n, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, - ta + x_idx, tl + y_idx, TX_4X4, 64); - } -} - -void vp9_optimize_sbuv_16x16(VP9_COMMON *const cm, MACROBLOCK *x) { - ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) x->e_mbd.above_context; - ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) x->e_mbd.left_context; - ENTROPY_CONTEXT *a, *l, *a1, *l1, above_ec, left_ec; - int b; - - for (b = 64; b < 96; b += 16) { - const int cidx = b >= 80 ? 20 : 16; - a = ta + vp9_block2above_sb[TX_16X16][b]; - l = tl + vp9_block2left_sb[TX_16X16][b]; - a1 = a + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); - l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); - above_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0; - left_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0; - optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant, - &above_ec, &left_ec, TX_16X16, 64); - } -} - -void vp9_optimize_sbuv_8x8(VP9_COMMON *const cm, MACROBLOCK *x) { - ENTROPY_CONTEXT_PLANES t_above[2], t_left[2]; - ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) t_above; - ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) t_left; - ENTROPY_CONTEXT *a, *l, above_ec, left_ec; - int b; - - vpx_memcpy(t_above, x->e_mbd.above_context, sizeof(t_above)); - vpx_memcpy(t_left, x->e_mbd.left_context, sizeof(t_left)); - for (b = 64; b < 96; b += 4) { - const int cidx = b >= 80 ? 20 : 16; - a = ta + vp9_block2above_sb[TX_8X8][b]; - l = tl + vp9_block2left_sb[TX_8X8][b]; - above_ec = (a[0] + a[1]) != 0; - left_ec = (l[0] + l[1]) != 0; - optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant, - &above_ec, &left_ec, TX_8X8, 64); - a[0] = a[1] = above_ec; - l[0] = l[1] = left_ec; + for (n = 0; n < bw; n++) { + ENTROPY_CONTEXT *a = + (ENTROPY_CONTEXT *) (x->e_mbd.above_context + n * 2 + 0); + ENTROPY_CONTEXT *a1 = + (ENTROPY_CONTEXT *) (x->e_mbd.above_context + n * 2 + 1); + ta[n] = (a[0] + a[1] + a[2] + a[3] + a1[0] + a1[1] + a1[2] + a1[3]) != 0; } -} - -void vp9_optimize_sbuv_4x4(VP9_COMMON *const cm, MACROBLOCK *x) { - ENTROPY_CONTEXT_PLANES t_above[2], t_left[2]; - ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) t_above; - ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) t_left; - ENTROPY_CONTEXT *a, *l; - int b; - - vpx_memcpy(t_above, x->e_mbd.above_context, sizeof(t_above)); - vpx_memcpy(t_left, x->e_mbd.left_context, sizeof(t_left)); - for (b = 64; b < 96; b++) { - const int cidx = b >= 80 ? 20 : 16; - a = ta + vp9_block2above_sb[TX_4X4][b]; - l = tl + vp9_block2left_sb[TX_4X4][b]; - optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant, - a, l, TX_4X4, 64); + for (n = 0; n < bh; n++) { + ENTROPY_CONTEXT *l = + (ENTROPY_CONTEXT *) (x->e_mbd.left_context + n * 2); + ENTROPY_CONTEXT *l1 = + (ENTROPY_CONTEXT *) (x->e_mbd.left_context + n * 2 + 1); + tl[n] = (l[0] + l[1] + l[2] + l[3] + l1[0] + l1[1] + l1[2] + l1[3]) != 0; } -} -void vp9_optimize_sb64y_32x32(VP9_COMMON *const cm, MACROBLOCK *x) { - ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context; - ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1); - ENTROPY_CONTEXT *a2 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 2); - ENTROPY_CONTEXT *a3 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 3); - ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context; - ENTROPY_CONTEXT *l1 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 1); - ENTROPY_CONTEXT *l2 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 2); - ENTROPY_CONTEXT *l3 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 3); - ENTROPY_CONTEXT ta[2], tl[2]; - int n; - - ta[0] = (a[0] + a[1] + a[2] + a[3] + a1[0] + a1[1] + a1[2] + a1[3]) != 0; - ta[1] = (a2[0] + a2[1] + a2[2] + a2[3] + a3[0] + a3[1] + a3[2] + a3[3]) != 0; - tl[0] = (l[0] + l[1] + l[2] + l[3] + l1[0] + l1[1] + l1[2] + l1[3]) != 0; - tl[1] = (l2[0] + l2[1] + l2[2] + l2[3] + l3[0] + l3[1] + l3[2] + l3[3]) != 0; - for (n = 0; n < 4; n++) { - const int x_idx = n & 1, y_idx = n >> 1; + for (n = 0; n < bw * bh; n++) { + const int x_idx = n & (bw - 1), y_idx = n >> bwl; optimize_b(cm, x, n * 64, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, - ta + x_idx, tl + y_idx, TX_32X32, 256); + ta + x_idx, tl + y_idx, TX_32X32, 64 * bw * bh); } } -void vp9_optimize_sb64y_16x16(VP9_COMMON *const cm, MACROBLOCK *x) { - ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context; - ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1); - ENTROPY_CONTEXT *a2 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 2); - ENTROPY_CONTEXT *a3 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 3); - ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context; - ENTROPY_CONTEXT *l1 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 1); - ENTROPY_CONTEXT *l2 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 2); - ENTROPY_CONTEXT *l3 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 3); +void vp9_optimize_sby_16x16(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize) { + const int bwl = mb_width_log2(bsize), bw = 1 << bwl; + const int bh = 1 << mb_height_log2(bsize); ENTROPY_CONTEXT ta[4], tl[4]; int n; - ta[0] = (a[0] + a[1] + a[2] + a[3]) != 0; - ta[1] = (a1[0] + a1[1] + a1[2] + a1[3]) != 0; - ta[2] = (a2[0] + a2[1] + a2[2] + a2[3]) != 0; - ta[3] = (a3[0] + a3[1] + a3[2] + a3[3]) != 0; - tl[0] = (l[0] + l[1] + l[2] + l[3]) != 0; - tl[1] = (l1[0] + l1[1] + l1[2] + l1[3]) != 0; - tl[2] = (l2[0] + l2[1] + l2[2] + l2[3]) != 0; - tl[3] = (l3[0] + l3[1] + l3[2] + l3[3]) != 0; - for (n = 0; n < 16; n++) { - const int x_idx = n & 3, y_idx = n >> 2; + for (n = 0; n < bw; n++) { + ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + n); + ta[n] = (a[0] + a[1] + a[2] + a[3]) != 0; + } + for (n = 0; n < bh; n++) { + ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + n); + tl[n] = (l[0] + l[1] + l[2] + l[3]) != 0; + } + for (n = 0; n < bw * bh; n++) { + const int x_idx = n & (bw - 1), y_idx = n >> bwl; optimize_b(cm, x, n * 16, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, - ta + x_idx, tl + y_idx, TX_16X16, 256); + ta + x_idx, tl + y_idx, TX_16X16, 16 * bw * bh); } } -void vp9_optimize_sb64y_8x8(VP9_COMMON *const cm, MACROBLOCK *x) { - ENTROPY_CONTEXT *a = (ENTROPY_CONTEXT *) x->e_mbd.above_context; - ENTROPY_CONTEXT *a1 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 1); - ENTROPY_CONTEXT *a2 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 2); - ENTROPY_CONTEXT *a3 = (ENTROPY_CONTEXT *) (x->e_mbd.above_context + 3); - ENTROPY_CONTEXT *l = (ENTROPY_CONTEXT *) x->e_mbd.left_context; - ENTROPY_CONTEXT *l1 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 1); - ENTROPY_CONTEXT *l2 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 2); - ENTROPY_CONTEXT *l3 = (ENTROPY_CONTEXT *) (x->e_mbd.left_context + 3); +void vp9_optimize_sby_8x8(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize) { + const int bwl = mb_width_log2(bsize) + 1, bw = 1 << bwl; + const int bh = 2 << mb_height_log2(bsize); ENTROPY_CONTEXT ta[8], tl[8]; int n; - ta[0] = (a[0] + a[1]) != 0; - ta[1] = (a[2] + a[3]) != 0; - ta[2] = (a1[0] + a1[1]) != 0; - ta[3] = (a1[2] + a1[3]) != 0; - ta[4] = (a2[0] + a2[1]) != 0; - ta[5] = (a2[2] + a2[3]) != 0; - ta[6] = (a3[0] + a3[1]) != 0; - ta[7] = (a3[2] + a3[3]) != 0; - tl[0] = (l[0] + l[1]) != 0; - tl[1] = (l[2] + l[3]) != 0; - tl[2] = (l1[0] + l1[1]) != 0; - tl[3] = (l1[2] + l1[3]) != 0; - tl[4] = (l2[0] + l2[1]) != 0; - tl[5] = (l2[2] + l2[3]) != 0; - tl[6] = (l3[0] + l3[1]) != 0; - tl[7] = (l3[2] + l3[3]) != 0; - for (n = 0; n < 64; n++) { - const int x_idx = n & 7, y_idx = n >> 3; + for (n = 0; n < bw; n += 2) { + ENTROPY_CONTEXT *a = + (ENTROPY_CONTEXT *) (x->e_mbd.above_context + (n >> 1)); + ta[n + 0] = (a[0] + a[1]) != 0; + ta[n + 1] = (a[2] + a[3]) != 0; + } + for (n = 0; n < bh; n += 2) { + ENTROPY_CONTEXT *l = + (ENTROPY_CONTEXT *) (x->e_mbd.left_context + (n >> 1)); + tl[n + 0] = (l[0] + l[1]) != 0; + tl[n + 1] = (l[2] + l[3]) != 0; + } + + for (n = 0; n < bw * bh; n++) { + const int x_idx = n & (bw - 1), y_idx = n >> bwl; optimize_b(cm, x, n * 4, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, - ta + x_idx, tl + y_idx, TX_8X8, 256); + ta + x_idx, tl + y_idx, TX_8X8, 4 * bw * bh); } } -void vp9_optimize_sb64y_4x4(VP9_COMMON *const cm, MACROBLOCK *x) { +void vp9_optimize_sby_4x4(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize) { + int bwl = mb_width_log2(bsize), bw = 1 << bwl; + int bh = 1 << mb_height_log2(bsize); ENTROPY_CONTEXT ta[16], tl[16]; int n; - vpx_memcpy(ta, x->e_mbd.above_context, 4 * sizeof(ENTROPY_CONTEXT)); - vpx_memcpy(ta + 4, x->e_mbd.above_context + 1, 4 * sizeof(ENTROPY_CONTEXT)); - vpx_memcpy(ta + 8, x->e_mbd.above_context + 2, 4 * sizeof(ENTROPY_CONTEXT)); - vpx_memcpy(ta + 12, x->e_mbd.above_context + 3, 4 * sizeof(ENTROPY_CONTEXT)); - vpx_memcpy(tl, x->e_mbd.left_context, 4 * sizeof(ENTROPY_CONTEXT)); - vpx_memcpy(tl + 4, x->e_mbd.left_context + 1, 4 * sizeof(ENTROPY_CONTEXT)); - vpx_memcpy(tl + 8, x->e_mbd.left_context + 2, 4 * sizeof(ENTROPY_CONTEXT)); - vpx_memcpy(tl + 12, x->e_mbd.left_context + 3, 4 * sizeof(ENTROPY_CONTEXT)); - for (n = 0; n < 256; n++) { - const int x_idx = n & 15, y_idx = n >> 4; + for (n = 0; n < bw; n++) + vpx_memcpy(&ta[n * 4], x->e_mbd.above_context + n, + sizeof(ENTROPY_CONTEXT) * 4); + for (n = 0; n < bh; n++) + vpx_memcpy(&tl[n * 4], x->e_mbd.left_context + n, + sizeof(ENTROPY_CONTEXT) * 4); + bw *= 4; + bh *= 4; + bwl += 2; + + for (n = 0; n < bw * bh; n++) { + const int x_idx = n & (bw - 1), y_idx = n >> bwl; optimize_b(cm, x, n, PLANE_TYPE_Y_WITH_DC, x->e_mbd.block[0].dequant, - ta + x_idx, tl + y_idx, TX_4X4, 256); + ta + x_idx, tl + y_idx, TX_4X4, bh * bw); } } -void vp9_optimize_sb64uv_32x32(VP9_COMMON *const cm, MACROBLOCK *x) { +void vp9_optimize_sbuv_32x32(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize) { ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) x->e_mbd.above_context; ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) x->e_mbd.left_context; ENTROPY_CONTEXT *a, *l, *a1, *l1, *a2, *l2, *a3, *l3, a_ec, l_ec; int b; + assert(bsize == BLOCK_SIZE_SB64X64); for (b = 256; b < 384; b += 64) { const int cidx = b >= 320 ? 20 : 16; a = ta + vp9_block2above_sb64[TX_32X32][b]; @@ -1231,67 +995,108 @@ void vp9_optimize_sb64uv_32x32(VP9_COMMON *const cm, MACROBLOCK *x) { } } -void vp9_optimize_sb64uv_16x16(VP9_COMMON *const cm, MACROBLOCK *x) { - ENTROPY_CONTEXT_PLANES t_above[4], t_left[4]; - ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) t_above; - ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) t_left; - ENTROPY_CONTEXT *a, *l, *a1, *l1, above_ec, left_ec; - int b; +void vp9_optimize_sbuv_16x16(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize) { + const int bwl = mb_width_log2(bsize), bhl = mb_height_log2(bsize); + const int bw = 1 << (bwl - 1); + const int bh = 1 << (bhl - 1); + int uvoff = 16 << (bwl + bhl); + ENTROPY_CONTEXT ta[2][2], tl[2][2]; + int plane, n; + + for (n = 0; n < bw; n++) { + ENTROPY_CONTEXT_PLANES *a = x->e_mbd.above_context + n * 2; + ENTROPY_CONTEXT_PLANES *a1 = x->e_mbd.above_context + n * 2 + 1; + ta[0][n] = (a->u[0] + a->u[1] + a1->u[0] + a1->u[1]) != 0; + ta[1][n] = (a->v[0] + a->v[1] + a1->v[0] + a1->v[1]) != 0; + } + for (n = 0; n < bh; n++) { + ENTROPY_CONTEXT_PLANES *l = (x->e_mbd.left_context + n * 2); + ENTROPY_CONTEXT_PLANES *l1 = (x->e_mbd.left_context + n * 2 + 1); + tl[0][n] = (l->u[0] + l->u[1] + l1->u[0] + l1->u[1]) != 0; + tl[1][n] = (l->v[0] + l->v[1] + l1->v[0] + l1->v[1]) != 0; + } - vpx_memcpy(t_above, x->e_mbd.above_context, sizeof(t_above)); - vpx_memcpy(t_left, x->e_mbd.left_context, sizeof(t_left)); - for (b = 256; b < 384; b += 16) { - const int cidx = b >= 320 ? 20 : 16; - a = ta + vp9_block2above_sb64[TX_16X16][b]; - l = tl + vp9_block2left_sb64[TX_16X16][b]; - a1 = a + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); - l1 = l + sizeof(ENTROPY_CONTEXT_PLANES) / sizeof(ENTROPY_CONTEXT); - above_ec = (a[0] + a[1] + a1[0] + a1[1]) != 0; - left_ec = (l[0] + l[1] + l1[0] + l1[1]) != 0; - optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant, - &above_ec, &left_ec, TX_16X16, 256); - a[0] = a[1] = a1[0] = a1[1] = above_ec; - l[0] = l[1] = l1[0] = l1[1] = left_ec; + for (plane = 0; plane < 2; plane++) { + const int cidx = 16 + plane * 4; + for (n = 0; n < bw * bh; n++) { + const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1); + optimize_b(cm, x, uvoff + n * 16, PLANE_TYPE_UV, + x->e_mbd.block[cidx].dequant, + &ta[plane][x_idx], &tl[plane][y_idx], + TX_16X16, bh * bw * 64); + } + uvoff = (uvoff * 5) >> 2; // switch u -> v } } -void vp9_optimize_sb64uv_8x8(VP9_COMMON *const cm, MACROBLOCK *x) { - ENTROPY_CONTEXT_PLANES t_above[4], t_left[4]; - ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) t_above; - ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) t_left; - ENTROPY_CONTEXT *a, *l, above_ec, left_ec; - int b; +void vp9_optimize_sbuv_8x8(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize) { + const int bwl = mb_width_log2(bsize) + 1, bhl = mb_height_log2(bsize) + 1; + const int bw = 1 << (bwl - 1); + const int bh = 1 << (bhl - 1); + int uvoff = 4 << (bwl + bhl); + ENTROPY_CONTEXT ta[2][4], tl[2][4]; + int plane, n; + + for (n = 0; n < bw; n++) { + ENTROPY_CONTEXT_PLANES *a = x->e_mbd.above_context + n; + ta[0][n] = (a->u[0] + a->u[1]) != 0; + ta[1][n] = (a->v[0] + a->v[1]) != 0; + } + for (n = 0; n < bh; n++) { + ENTROPY_CONTEXT_PLANES *l = x->e_mbd.left_context + n; + tl[0][n] = (l->u[0] + l->u[1]) != 0; + tl[1][n] = (l->v[0] + l->v[1]) != 0; + } - vpx_memcpy(t_above, x->e_mbd.above_context, sizeof(t_above)); - vpx_memcpy(t_left, x->e_mbd.left_context, sizeof(t_left)); - for (b = 256; b < 384; b += 4) { - const int cidx = b >= 320 ? 20 : 16; - a = ta + vp9_block2above_sb64[TX_8X8][b]; - l = tl + vp9_block2left_sb64[TX_8X8][b]; - above_ec = (a[0] + a[1]) != 0; - left_ec = (l[0] + l[1]) != 0; - optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant, - &above_ec, &left_ec, TX_8X8, 256); - a[0] = a[1] = above_ec; - l[0] = l[1] = left_ec; + for (plane = 0; plane < 2; plane++) { + const int cidx = 16 + plane * 4; + for (n = 0; n < bw * bh; n++) { + const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1); + optimize_b(cm, x, uvoff + n * 4, PLANE_TYPE_UV, + x->e_mbd.block[cidx].dequant, + &ta[plane][x_idx], &tl[plane][y_idx], + TX_8X8, bh * bw * 16); + } + uvoff = (uvoff * 5) >> 2; // switch u -> v } } -void vp9_optimize_sb64uv_4x4(VP9_COMMON *const cm, MACROBLOCK *x) { - ENTROPY_CONTEXT_PLANES t_above[4], t_left[4]; - ENTROPY_CONTEXT *ta = (ENTROPY_CONTEXT *) t_above; - ENTROPY_CONTEXT *tl = (ENTROPY_CONTEXT *) t_left; - ENTROPY_CONTEXT *a, *l; - int b; +void vp9_optimize_sbuv_4x4(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize) { + const int bwl = mb_width_log2(bsize) + 2, bhl = mb_height_log2(bsize) + 2; + const int bw = 1 << (bwl - 1); + const int bh = 1 << (bhl - 1); + int uvoff = 1 << (bwl + bhl); + ENTROPY_CONTEXT ta[2][8], tl[2][8]; + int plane, n; + + for (n = 0; n < bw; n += 2) { + ENTROPY_CONTEXT_PLANES *a = x->e_mbd.above_context + (n >> 1); + ta[0][n + 0] = (a->u[0]) != 0; + ta[0][n + 1] = (a->u[1]) != 0; + ta[1][n + 0] = (a->v[0]) != 0; + ta[1][n + 1] = (a->v[1]) != 0; + } + for (n = 0; n < bh; n += 2) { + ENTROPY_CONTEXT_PLANES *l = x->e_mbd.left_context + (n >> 1); + tl[0][n + 0] = (l->u[0]) != 0; + tl[0][n + 1] = (l->u[1]) != 0; + tl[1][n + 0] = (l->v[0]) != 0; + tl[1][n + 1] = (l->v[1]) != 0; + } - vpx_memcpy(t_above, x->e_mbd.above_context, sizeof(t_above)); - vpx_memcpy(t_left, x->e_mbd.left_context, sizeof(t_left)); - for (b = 256; b < 384; b++) { - const int cidx = b >= 320 ? 20 : 16; - a = ta + vp9_block2above_sb64[TX_4X4][b]; - l = tl + vp9_block2left_sb64[TX_4X4][b]; - optimize_b(cm, x, b, PLANE_TYPE_UV, x->e_mbd.block[cidx].dequant, - a, l, TX_4X4, 256); + for (plane = 0; plane < 2; plane++) { + const int cidx = 16 + plane * 4; + for (n = 0; n < bw * bh; n++) { + const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1); + optimize_b(cm, x, uvoff + n, PLANE_TYPE_UV, + x->e_mbd.block[cidx].dequant, + &ta[plane][x_idx], &tl[plane][y_idx], + TX_4X4, bh * bw * 4); + } + uvoff = (uvoff * 5) >> 2; // switch u -> v } } diff --git a/vp9/encoder/vp9_encodemb.h b/vp9/encoder/vp9_encodemb.h index 242afbe..f0c8ac5 100644 --- a/vp9/encoder/vp9_encodemb.h +++ b/vp9/encoder/vp9_encodemb.h @@ -44,37 +44,30 @@ void vp9_transform_mb_16x16(MACROBLOCK *mb); void vp9_transform_mby_16x16(MACROBLOCK *x); void vp9_optimize_mby_16x16(VP9_COMMON *const cm, MACROBLOCK *x); -void vp9_transform_sby_32x32(MACROBLOCK *x); -void vp9_optimize_sby_32x32(VP9_COMMON *const cm, MACROBLOCK *x); -void vp9_transform_sby_16x16(MACROBLOCK *x); -void vp9_optimize_sby_16x16(VP9_COMMON *const cm, MACROBLOCK *x); -void vp9_transform_sby_8x8(MACROBLOCK *x); -void vp9_optimize_sby_8x8(VP9_COMMON *const cm, MACROBLOCK *x); -void vp9_transform_sby_4x4(MACROBLOCK *x); -void vp9_optimize_sby_4x4(VP9_COMMON *const cm, MACROBLOCK *x); -void vp9_transform_sbuv_16x16(MACROBLOCK *x); -void vp9_optimize_sbuv_16x16(VP9_COMMON *const cm, MACROBLOCK *x); -void vp9_transform_sbuv_8x8(MACROBLOCK *x); -void vp9_optimize_sbuv_8x8(VP9_COMMON *const cm, MACROBLOCK *x); -void vp9_transform_sbuv_4x4(MACROBLOCK *x); -void vp9_optimize_sbuv_4x4(VP9_COMMON *const cm, MACROBLOCK *x); - -void vp9_transform_sb64y_32x32(MACROBLOCK *x); -void vp9_optimize_sb64y_32x32(VP9_COMMON *const cm, MACROBLOCK *x); -void vp9_transform_sb64y_16x16(MACROBLOCK *x); -void vp9_optimize_sb64y_16x16(VP9_COMMON *const cm, MACROBLOCK *x); -void vp9_transform_sb64y_8x8(MACROBLOCK *x); -void vp9_optimize_sb64y_8x8(VP9_COMMON *const cm, MACROBLOCK *x); -void vp9_transform_sb64y_4x4(MACROBLOCK *x); -void vp9_optimize_sb64y_4x4(VP9_COMMON *const cm, MACROBLOCK *x); -void vp9_transform_sb64uv_32x32(MACROBLOCK *x); -void vp9_optimize_sb64uv_32x32(VP9_COMMON *const cm, MACROBLOCK *x); -void vp9_transform_sb64uv_16x16(MACROBLOCK *x); -void vp9_optimize_sb64uv_16x16(VP9_COMMON *const cm, MACROBLOCK *x); -void vp9_transform_sb64uv_8x8(MACROBLOCK *x); -void vp9_optimize_sb64uv_8x8(VP9_COMMON *const cm, MACROBLOCK *x); -void vp9_transform_sb64uv_4x4(MACROBLOCK *x); -void vp9_optimize_sb64uv_4x4(VP9_COMMON *const cm, MACROBLOCK *x); +void vp9_transform_sby_32x32(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); +void vp9_optimize_sby_32x32(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize); +void vp9_transform_sby_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); +void vp9_optimize_sby_16x16(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize); +void vp9_transform_sby_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); +void vp9_optimize_sby_8x8(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize); +void vp9_transform_sby_4x4(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); +void vp9_optimize_sby_4x4(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize); +void vp9_transform_sbuv_32x32(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); +void vp9_optimize_sbuv_32x32(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize); +void vp9_transform_sbuv_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); +void vp9_optimize_sbuv_16x16(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize); +void vp9_transform_sbuv_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); +void vp9_optimize_sbuv_8x8(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize); +void vp9_transform_sbuv_4x4(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); +void vp9_optimize_sbuv_4x4(VP9_COMMON *const cm, MACROBLOCK *x, + BLOCK_SIZE_TYPE bsize); void vp9_fidct_mb(VP9_COMMON *const cm, MACROBLOCK *x); @@ -88,16 +81,12 @@ void vp9_subtract_mby_s_c(int16_t *diff, const uint8_t *src, int src_stride, const uint8_t *pred, int dst_stride); void vp9_subtract_sby_s_c(int16_t *diff, const uint8_t *src, int src_stride, - const uint8_t *pred, int dst_stride); + const uint8_t *pred, int dst_stride, + BLOCK_SIZE_TYPE bsize); void vp9_subtract_sbuv_s_c(int16_t *diff, const uint8_t *usrc, const uint8_t *vsrc, int src_stride, const uint8_t *upred, - const uint8_t *vpred, int dst_stride); -void vp9_subtract_sb64y_s_c(int16_t *diff, const uint8_t *src, int src_stride, - const uint8_t *pred, int dst_stride); -void vp9_subtract_sb64uv_s_c(int16_t *diff, const uint8_t *usrc, - const uint8_t *vsrc, int src_stride, - const uint8_t *upred, - const uint8_t *vpred, int dst_stride); + const uint8_t *vpred, int dst_stride, + BLOCK_SIZE_TYPE bsize); #endif // VP9_ENCODER_VP9_ENCODEMB_H_ diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c index 46e8a4a..7a8b251 100644 --- a/vp9/encoder/vp9_quantize.c +++ b/vp9/encoder/vp9_quantize.c @@ -496,128 +496,93 @@ void vp9_regular_quantize_b_32x32(MACROBLOCK *mb, int b_idx, int y_blocks) { vp9_default_zig_zag1d_32x32, 2); } -void vp9_quantize_sby_32x32(MACROBLOCK *x) { - vp9_regular_quantize_b_32x32(x, 0, 64); -} - -void vp9_quantize_sby_16x16(MACROBLOCK *x) { - int n; - - for (n = 0; n < 4; n++) { - TX_TYPE tx_type = get_tx_type_16x16(&x->e_mbd, - (16 * (n & 2)) + ((n & 1) * 4)); - x->quantize_b_16x16(x, n * 16, tx_type, 64); - } -} - -void vp9_quantize_sby_8x8(MACROBLOCK *x) { - int n; - - for (n = 0; n < 16; n++) { - TX_TYPE tx_type = get_tx_type_8x8(&x->e_mbd, - (4 * (n & 12)) + ((n & 3) * 2)); - x->quantize_b_8x8(x, n * 4, tx_type, 64); - } -} - -void vp9_quantize_sby_4x4(MACROBLOCK *x) { - MACROBLOCKD *const xd = &x->e_mbd; - int n; - - for (n = 0; n < 64; n++) { - const TX_TYPE tx_type = get_tx_type_4x4(xd, n); - if (tx_type != DCT_DCT) { - vp9_ht_quantize_b_4x4(x, n, tx_type); - } else { - x->quantize_b_4x4(x, n, 64); - } - } -} - -void vp9_quantize_sbuv_16x16(MACROBLOCK *x) { - x->quantize_b_16x16(x, 64, DCT_DCT, 64); - x->quantize_b_16x16(x, 80, DCT_DCT, 64); -} - -void vp9_quantize_sbuv_8x8(MACROBLOCK *x) { - int i; - - for (i = 64; i < 96; i += 4) - x->quantize_b_8x8(x, i, DCT_DCT, 64); -} - -void vp9_quantize_sbuv_4x4(MACROBLOCK *x) { - int i; - - for (i = 64; i < 96; i++) - x->quantize_b_4x4(x, i, 64); -} - -void vp9_quantize_sb64y_32x32(MACROBLOCK *x) { +void vp9_quantize_sby_32x32(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { + const int bw = 1 << (mb_width_log2(bsize) - 1); + const int bh = 1 << (mb_height_log2(bsize) - 1); int n; - for (n = 0; n < 4; n++) - vp9_regular_quantize_b_32x32(x, n * 64, 256); + for (n = 0; n < bw * bh; n++) + vp9_regular_quantize_b_32x32(x, n * 64, bw * bh * 64); } -void vp9_quantize_sb64y_16x16(MACROBLOCK *x) { +void vp9_quantize_sby_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { + const int bwl = mb_width_log2(bsize), bw = 1 << bwl; + const int bh = 1 << mb_height_log2(bsize); + const int bstride = 16 << bwl; int n; - for (n = 0; n < 16; n++) { + for (n = 0; n < bw * bh; n++) { + const int x_idx = n & (bw - 1), y_idx = n >> bwl; TX_TYPE tx_type = get_tx_type_16x16(&x->e_mbd, - (16 * (n & 12)) + ((n & 3) * 4)); - x->quantize_b_16x16(x, n * 16, tx_type, 256); + 4 * x_idx + y_idx * bstride); + x->quantize_b_16x16(x, n * 16, tx_type, 16 * bw * bh); } } -void vp9_quantize_sb64y_8x8(MACROBLOCK *x) { +void vp9_quantize_sby_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { + const int bwl = mb_width_log2(bsize) + 1, bw = 1 << bwl; + const int bh = 1 << (mb_height_log2(bsize) + 1); + const int bstride = 4 << bwl; int n; - for (n = 0; n < 64; n++) { + for (n = 0; n < bw * bh; n++) { + const int x_idx = n & (bw - 1), y_idx = n >> bwl; TX_TYPE tx_type = get_tx_type_8x8(&x->e_mbd, - (4 * (n & 56)) + ((n & 7) * 2)); - x->quantize_b_8x8(x, n * 4, tx_type, 256); + 2 * x_idx + y_idx * bstride); + x->quantize_b_8x8(x, n * 4, tx_type, 4 * bw * bh); } } -void vp9_quantize_sb64y_4x4(MACROBLOCK *x) { +void vp9_quantize_sby_4x4(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { + const int bwl = mb_width_log2(bsize) + 2, bw = 1 << bwl; + const int bh = 1 << (mb_height_log2(bsize) + 2); MACROBLOCKD *const xd = &x->e_mbd; int n; - for (n = 0; n < 256; n++) { + for (n = 0; n < bw * bh; n++) { const TX_TYPE tx_type = get_tx_type_4x4(xd, n); if (tx_type != DCT_DCT) { vp9_ht_quantize_b_4x4(x, n, tx_type); } else { - x->quantize_b_4x4(x, n, 256); + x->quantize_b_4x4(x, n, bw * bh); } } } -void vp9_quantize_sb64uv_32x32(MACROBLOCK *x) { +void vp9_quantize_sbuv_32x32(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { + assert(bsize == BLOCK_SIZE_SB64X64); vp9_regular_quantize_b_32x32(x, 256, 256); vp9_regular_quantize_b_32x32(x, 320, 256); } -void vp9_quantize_sb64uv_16x16(MACROBLOCK *x) { +void vp9_quantize_sbuv_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { + const int bwl = mb_width_log2(bsize); + const int bhl = mb_width_log2(bsize); + const int uoff = 16 << (bhl + bwl); int i; - for (i = 256; i < 384; i += 16) - x->quantize_b_16x16(x, i, DCT_DCT, 256); + for (i = uoff; i < ((uoff * 3) >> 1); i += 16) + x->quantize_b_16x16(x, i, DCT_DCT, uoff); } -void vp9_quantize_sb64uv_8x8(MACROBLOCK *x) { +void vp9_quantize_sbuv_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { + const int bwl = mb_width_log2(bsize); + const int bhl = mb_width_log2(bsize); + const int uoff = 16 << (bhl + bwl); int i; - for (i = 256; i < 384; i += 4) - x->quantize_b_8x8(x, i, DCT_DCT, 256); + for (i = uoff; i < ((uoff * 3) >> 1); i += 4) + x->quantize_b_8x8(x, i, DCT_DCT, uoff); } -void vp9_quantize_sb64uv_4x4(MACROBLOCK *x) { +void vp9_quantize_sbuv_4x4(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { + const int bwl = mb_width_log2(bsize); + const int bhl = mb_width_log2(bsize); + const int uoff = 16 << (bhl + bwl); int i; - for (i = 256; i < 384; i++) - x->quantize_b_4x4(x, i, 256); + for (i = uoff; i < ((uoff * 3) >> 1); i++) + x->quantize_b_4x4(x, i, uoff); } /* quantize_b_pair function pointer in MACROBLOCK structure is set to one of diff --git a/vp9/encoder/vp9_quantize.h b/vp9/encoder/vp9_quantize.h index 0f706a2..75bd6ed 100644 --- a/vp9/encoder/vp9_quantize.h +++ b/vp9/encoder/vp9_quantize.h @@ -49,22 +49,14 @@ void vp9_quantize_mbuv_8x8(MACROBLOCK *x); void vp9_quantize_mb_16x16(MACROBLOCK *x); void vp9_quantize_mby_16x16(MACROBLOCK *x); -void vp9_quantize_sby_32x32(MACROBLOCK *x); -void vp9_quantize_sby_16x16(MACROBLOCK *x); -void vp9_quantize_sby_8x8(MACROBLOCK *x); -void vp9_quantize_sby_4x4(MACROBLOCK *x); -void vp9_quantize_sbuv_16x16(MACROBLOCK *x); -void vp9_quantize_sbuv_8x8(MACROBLOCK *x); -void vp9_quantize_sbuv_4x4(MACROBLOCK *x); - -void vp9_quantize_sb64y_32x32(MACROBLOCK *x); -void vp9_quantize_sb64y_16x16(MACROBLOCK *x); -void vp9_quantize_sb64y_8x8(MACROBLOCK *x); -void vp9_quantize_sb64y_4x4(MACROBLOCK *x); -void vp9_quantize_sb64uv_32x32(MACROBLOCK *x); -void vp9_quantize_sb64uv_16x16(MACROBLOCK *x); -void vp9_quantize_sb64uv_8x8(MACROBLOCK *x); -void vp9_quantize_sb64uv_4x4(MACROBLOCK *x); +void vp9_quantize_sby_32x32(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); +void vp9_quantize_sby_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); +void vp9_quantize_sby_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); +void vp9_quantize_sby_4x4(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); +void vp9_quantize_sbuv_32x32(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); +void vp9_quantize_sbuv_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); +void vp9_quantize_sbuv_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); +void vp9_quantize_sbuv_4x4(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize); struct VP9_COMP; diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 35e204a..566fb60 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -907,12 +907,12 @@ static void super_block_yrd_4x4(VP9_COMMON *const cm, MACROBLOCK *x, MACROBLOCKD *const xd = &x->e_mbd; xd->mode_info_context->mbmi.txfm_size = TX_4X4; - vp9_transform_sby_4x4(x); - vp9_quantize_sby_4x4(x); + vp9_transform_sby_4x4(x, BLOCK_SIZE_SB32X32); + vp9_quantize_sby_4x4(x, BLOCK_SIZE_SB32X32); *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, 1024, 2); *rate = rdcost_sby_4x4(cm, x); - *skippable = vp9_sby_is_skippable_4x4(xd); + *skippable = vp9_sby_is_skippable(xd, BLOCK_SIZE_SB32X32, TX_4X4); } static int rdcost_sby_8x8(VP9_COMMON *const cm, MACROBLOCK *x) { @@ -938,12 +938,12 @@ static void super_block_yrd_8x8(VP9_COMMON *const cm, MACROBLOCK *x, MACROBLOCKD *const xd = &x->e_mbd; xd->mode_info_context->mbmi.txfm_size = TX_8X8; - vp9_transform_sby_8x8(x); - vp9_quantize_sby_8x8(x); + vp9_transform_sby_8x8(x, BLOCK_SIZE_SB32X32); + vp9_quantize_sby_8x8(x, BLOCK_SIZE_SB32X32); *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, 1024, 2); *rate = rdcost_sby_8x8(cm, x); - *skippable = vp9_sby_is_skippable_8x8(xd); + *skippable = vp9_sby_is_skippable(xd, BLOCK_SIZE_SB32X32, TX_8X8); } static int rdcost_sby_16x16(VP9_COMMON *const cm, MACROBLOCK *x) { @@ -969,12 +969,12 @@ static void super_block_yrd_16x16(VP9_COMMON *const cm, MACROBLOCK *x, MACROBLOCKD *const xd = &x->e_mbd; xd->mode_info_context->mbmi.txfm_size = TX_16X16; - vp9_transform_sby_16x16(x); - vp9_quantize_sby_16x16(x); + vp9_transform_sby_16x16(x, BLOCK_SIZE_SB32X32); + vp9_quantize_sby_16x16(x, BLOCK_SIZE_SB32X32); *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, 1024, 2); *rate = rdcost_sby_16x16(cm, x); - *skippable = vp9_sby_is_skippable_16x16(xd); + *skippable = vp9_sby_is_skippable(xd, BLOCK_SIZE_SB32X32, TX_16X16); } static int rdcost_sby_32x32(VP9_COMMON *const cm, MACROBLOCK *x) { @@ -994,12 +994,12 @@ static void super_block_yrd_32x32(VP9_COMMON *const cm, MACROBLOCK *x, MACROBLOCKD *const xd = &x->e_mbd; xd->mode_info_context->mbmi.txfm_size = TX_32X32; - vp9_transform_sby_32x32(x); - vp9_quantize_sby_32x32(x); + vp9_transform_sby_32x32(x, BLOCK_SIZE_SB32X32); + vp9_quantize_sby_32x32(x, BLOCK_SIZE_SB32X32); *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, 1024, 0); *rate = rdcost_sby_32x32(cm, x); - *skippable = vp9_sby_is_skippable_32x32(xd); + *skippable = vp9_sby_is_skippable(xd, BLOCK_SIZE_SB32X32, TX_32X32); } static void super_block_yrd(VP9_COMP *cpi, @@ -1012,7 +1012,8 @@ static void super_block_yrd(VP9_COMP *cpi, const uint8_t *src = x->src.y_buffer, *dst = xd->dst.y_buffer; int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride; - vp9_subtract_sby_s_c(x->src_diff, src, src_y_stride, dst, dst_y_stride); + vp9_subtract_sby_s_c(x->src_diff, src, src_y_stride, dst, dst_y_stride, + BLOCK_SIZE_SB32X32); super_block_yrd_32x32(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32]); super_block_yrd_16x16(cm, x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16]); super_block_yrd_8x8(cm, x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8]); @@ -1045,12 +1046,12 @@ static void super_block64_yrd_4x4(VP9_COMMON *const cm, MACROBLOCK *x, MACROBLOCKD *const xd = &x->e_mbd; xd->mode_info_context->mbmi.txfm_size = TX_4X4; - vp9_transform_sb64y_4x4(x); - vp9_quantize_sb64y_4x4(x); + vp9_transform_sby_4x4(x, BLOCK_SIZE_SB64X64); + vp9_quantize_sby_4x4(x, BLOCK_SIZE_SB64X64); *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, 4096, 2); *rate = rdcost_sb64y_4x4(cm, x); - *skippable = vp9_sb64y_is_skippable_4x4(xd); + *skippable = vp9_sby_is_skippable(xd, BLOCK_SIZE_SB64X64, TX_4X4); } static int rdcost_sb64y_8x8(VP9_COMMON *const cm, MACROBLOCK *x) { @@ -1076,12 +1077,12 @@ static void super_block64_yrd_8x8(VP9_COMMON *const cm, MACROBLOCK *x, MACROBLOCKD *const xd = &x->e_mbd; xd->mode_info_context->mbmi.txfm_size = TX_8X8; - vp9_transform_sb64y_8x8(x); - vp9_quantize_sb64y_8x8(x); + vp9_transform_sby_8x8(x, BLOCK_SIZE_SB64X64); + vp9_quantize_sby_8x8(x, BLOCK_SIZE_SB64X64); *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, 4096, 2); *rate = rdcost_sb64y_8x8(cm, x); - *skippable = vp9_sb64y_is_skippable_8x8(xd); + *skippable = vp9_sby_is_skippable(xd, BLOCK_SIZE_SB64X64, TX_8X8); } static int rdcost_sb64y_16x16(VP9_COMMON *const cm, MACROBLOCK *x) { @@ -1108,12 +1109,12 @@ static void super_block64_yrd_16x16(VP9_COMMON *const cm, MACROBLOCK *x, MACROBLOCKD *const xd = &x->e_mbd; xd->mode_info_context->mbmi.txfm_size = TX_16X16; - vp9_transform_sb64y_16x16(x); - vp9_quantize_sb64y_16x16(x); + vp9_transform_sby_16x16(x, BLOCK_SIZE_SB64X64); + vp9_quantize_sby_16x16(x, BLOCK_SIZE_SB64X64); *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, 4096, 2); *rate = rdcost_sb64y_16x16(cm, x); - *skippable = vp9_sb64y_is_skippable_16x16(xd); + *skippable = vp9_sby_is_skippable(xd, BLOCK_SIZE_SB64X64, TX_16X16); } static int rdcost_sb64y_32x32(VP9_COMMON *const cm, MACROBLOCK *x) { @@ -1140,12 +1141,12 @@ static void super_block64_yrd_32x32(VP9_COMMON *const cm, MACROBLOCK *x, MACROBLOCKD *const xd = &x->e_mbd; xd->mode_info_context->mbmi.txfm_size = TX_32X32; - vp9_transform_sb64y_32x32(x); - vp9_quantize_sb64y_32x32(x); + vp9_transform_sby_32x32(x, BLOCK_SIZE_SB64X64); + vp9_quantize_sby_32x32(x, BLOCK_SIZE_SB64X64); *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, 4096, 0); *rate = rdcost_sb64y_32x32(cm, x); - *skippable = vp9_sb64y_is_skippable_32x32(xd); + *skippable = vp9_sby_is_skippable(xd, BLOCK_SIZE_SB64X64, TX_32X32); } static void super_block_64_yrd(VP9_COMP *cpi, @@ -1158,7 +1159,8 @@ static void super_block_64_yrd(VP9_COMP *cpi, const uint8_t *src = x->src.y_buffer, *dst = xd->dst.y_buffer; int src_y_stride = x->src.y_stride, dst_y_stride = xd->dst.y_stride; - vp9_subtract_sb64y_s_c(x->src_diff, src, src_y_stride, dst, dst_y_stride); + vp9_subtract_sby_s_c(x->src_diff, src, src_y_stride, dst, dst_y_stride, + BLOCK_SIZE_SB64X64); super_block64_yrd_32x32(cm, x, &r[TX_32X32][0], &d[TX_32X32], &s[TX_32X32]); super_block64_yrd_16x16(cm, x, &r[TX_16X16][0], &d[TX_16X16], &s[TX_16X16]); super_block64_yrd_8x8(cm, x, &r[TX_8X8][0], &d[TX_8X8], &s[TX_8X8]); @@ -1883,14 +1885,14 @@ static void rd_inter32x32_uv_16x16(VP9_COMMON *const cm, MACROBLOCK *x, int backup) { MACROBLOCKD *const xd = &x->e_mbd; - vp9_transform_sbuv_16x16(x); - vp9_quantize_sbuv_16x16(x); + vp9_transform_sbuv_16x16(x, BLOCK_SIZE_SB32X32); + vp9_quantize_sbuv_16x16(x, BLOCK_SIZE_SB32X32); *rate = rd_cost_sbuv_16x16(cm, x, backup); *distortion = vp9_sb_uv_block_error_c(x->coeff + 1024, xd->plane[1].dqcoeff, xd->plane[2].dqcoeff, 512, 2); - *skip = vp9_sbuv_is_skippable_16x16(xd); + *skip = vp9_sbuv_is_skippable(xd, BLOCK_SIZE_SB32X32, TX_16X16); } static int64_t rd_inter32x32_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate, @@ -1904,7 +1906,8 @@ static int64_t rd_inter32x32_uv(VP9_COMP *cpi, MACROBLOCK *x, int *rate, if (mbmi->txfm_size >= TX_16X16) { vp9_subtract_sbuv_s_c(x->src_diff, usrc, vsrc, src_uv_stride, - udst, vdst, dst_uv_stride); + udst, vdst, dst_uv_stride, + BLOCK_SIZE_SB32X32); rd_inter32x32_uv_16x16(&cpi->common, x, rate, distortion, skip, 1); } else { int n, r = 0, d = 0; @@ -2078,7 +2081,8 @@ static void super_block_uvrd(VP9_COMMON *const cm, if (mbmi->txfm_size >= TX_16X16) { vp9_subtract_sbuv_s_c(x->src_diff, usrc, vsrc, src_uv_stride, - udst, vdst, dst_uv_stride); + udst, vdst, dst_uv_stride, + BLOCK_SIZE_SB32X32); rd_inter32x32_uv_16x16(cm, x, rate, distortion, skippable, 1); } else { int d = 0, r = 0, n, s = 1; @@ -2160,14 +2164,14 @@ static void rd_inter64x64_uv_32x32(VP9_COMMON *const cm, MACROBLOCK *x, int backup) { MACROBLOCKD *const xd = &x->e_mbd; - vp9_transform_sb64uv_32x32(x); - vp9_quantize_sb64uv_32x32(x); + vp9_transform_sbuv_32x32(x, BLOCK_SIZE_SB64X64); + vp9_quantize_sbuv_32x32(x, BLOCK_SIZE_SB64X64); *rate = rd_cost_sb64uv_32x32(cm, x, backup); *distortion = vp9_sb_uv_block_error_c(x->coeff + 4096, xd->plane[1].dqcoeff, xd->plane[2].dqcoeff, 2048, 0); - *skip = vp9_sb64uv_is_skippable_32x32(xd); + *skip = vp9_sbuv_is_skippable(xd, BLOCK_SIZE_SB64X64, TX_32X32); } static void super_block_64_uvrd(VP9_COMMON *const cm, MACROBLOCK *x, @@ -2189,8 +2193,9 @@ static void super_block_64_uvrd(VP9_COMMON *const cm, MACROBLOCK *x, memcpy(t_left, xd->left_context, sizeof(t_left)); if (mbmi->txfm_size == TX_32X32) { - vp9_subtract_sb64uv_s_c(x->src_diff, usrc, vsrc, src_uv_stride, - udst, vdst, dst_uv_stride); + vp9_subtract_sbuv_s_c(x->src_diff, usrc, vsrc, src_uv_stride, + udst, vdst, dst_uv_stride, + BLOCK_SIZE_SB64X64); rd_inter64x64_uv_32x32(cm, x, &r, &d, &s, 1); } else if (mbmi->txfm_size == TX_16X16) { int n; @@ -2206,7 +2211,7 @@ static void super_block_64_uvrd(VP9_COMMON *const cm, MACROBLOCK *x, src_uv_stride, udst + x_idx * 16 + y_idx * 16 * dst_uv_stride, vdst + x_idx * 16 + y_idx * 16 * dst_uv_stride, - dst_uv_stride); + dst_uv_stride, BLOCK_SIZE_SB32X32); xd->above_context = t_above + x_idx * 2; xd->left_context = t_left + y_idx * 2; rd_inter32x32_uv_16x16(cm, x, &r_tmp, &d_tmp, &s_tmp, 0); diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c index 158a0bd..58b7412 100644 --- a/vp9/encoder/vp9_tokenize.c +++ b/vp9/encoder/vp9_tokenize.c @@ -395,120 +395,67 @@ static int mb_is_skippable_16x16(MACROBLOCKD *xd) { return (vp9_mby_is_skippable_16x16(xd) & vp9_mbuv_is_skippable_8x8(xd)); } -int vp9_sby_is_skippable_32x32(MACROBLOCKD *xd) { - return (!xd->plane[0].eobs[0]); -} - -int vp9_sbuv_is_skippable_16x16(MACROBLOCKD *xd) { - return (!xd->plane[1].eobs[0]) & (!xd->plane[2].eobs[0]); -} - -static int sb_is_skippable_32x32(MACROBLOCKD *xd) { - return vp9_sby_is_skippable_32x32(xd) && - vp9_sbuv_is_skippable_16x16(xd); -} - -int vp9_sby_is_skippable_16x16(MACROBLOCKD *xd) { - int skip = 1; - int i = 0; - - for (i = 0; i < 64; i += 16) - skip &= (!xd->plane[0].eobs[i]); - - return skip; -} - -static int sb_is_skippable_16x16(MACROBLOCKD *xd) { - return vp9_sby_is_skippable_16x16(xd) & vp9_sbuv_is_skippable_16x16(xd); -} - -int vp9_sby_is_skippable_8x8(MACROBLOCKD *xd) { - int skip = 1; - int i = 0; - - for (i = 0; i < 64; i += 4) - skip &= (!xd->plane[0].eobs[i]); - - return skip; -} - -int vp9_sbuv_is_skippable_8x8(MACROBLOCKD *xd) { - int skip = 1; - int i = 0; - - for (i = 0; i < 16; i += 4) - skip &= (!xd->plane[1].eobs[i]); - for (i = 0; i < 16; i += 4) - skip &= (!xd->plane[2].eobs[i]); - - return skip; -} - -static int sb_is_skippable_8x8(MACROBLOCKD *xd) { - return vp9_sby_is_skippable_8x8(xd) & vp9_sbuv_is_skippable_8x8(xd); -} - -int vp9_sby_is_skippable_4x4(MACROBLOCKD *xd) { +int vp9_sby_is_skippable(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize, + TX_SIZE sz) { + const int inc = 1 << (sz * 2); + const int bwl = mb_width_log2(bsize) + 2, bhl = mb_height_log2(bsize) + 2; int skip = 1; int i = 0; - for (i = 0; i < 64; i++) + for (i = 0; i < (1 << (bwl + bhl)); i += inc) skip &= (!xd->plane[0].eobs[i]); return skip; } -int vp9_sbuv_is_skippable_4x4(MACROBLOCKD *xd) { +int vp9_sbuv_is_skippable(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize, TX_SIZE sz) { + const int inc = 1 << (sz * 2); + const int bwl = mb_width_log2(bsize) + 1, bhl = mb_height_log2(bsize) + 1; int skip = 1; int i = 0; - for (i = 0; i < 16; i++) + for (i = 0; i < (1 << (bwl + bhl)); i += inc) { skip &= (!xd->plane[1].eobs[i]); - for (i = 0; i < 16; i++) skip &= (!xd->plane[2].eobs[i]); + } return skip; } -static int sb_is_skippable_4x4(MACROBLOCKD *xd) { - return vp9_sby_is_skippable_4x4(xd) & vp9_sbuv_is_skippable_4x4(xd); +static int sb_is_skippable(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize, + TX_SIZE ysz, TX_SIZE uvsz) { + return vp9_sby_is_skippable(xd, bsize, ysz) & + vp9_sbuv_is_skippable(xd, bsize, uvsz); } void vp9_tokenize_sb(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, - int dry_run) { + int dry_run, BLOCK_SIZE_TYPE bsize) { + const int bwl = mb_width_log2(bsize) + 2, bhl = mb_height_log2(bsize) + 2; VP9_COMMON * const cm = &cpi->common; MB_MODE_INFO * const mbmi = &xd->mode_info_context->mbmi; TOKENEXTRA *t_backup = *t; const int mb_skip_context = vp9_get_pred_context(cm, xd, PRED_MBSKIP); const int segment_id = mbmi->segment_id; const int skip_inc = !vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP); + const TX_SIZE txfm_size = mbmi->txfm_size; + const TX_SIZE uv_txfm_size = (bsize < BLOCK_SIZE_SB32X32 && + txfm_size == TX_16X16) ? TX_8X8 : + (bsize < BLOCK_SIZE_SB64X64 && + txfm_size == TX_32X32) ? TX_16X16 : txfm_size; int b; + const int n_y = (1 << (bwl + bhl)), n_uv = (n_y * 3) >> 1; - switch (mbmi->txfm_size) { - case TX_32X32: - mbmi->mb_skip_coeff = sb_is_skippable_32x32(xd); - break; - case TX_16X16: - mbmi->mb_skip_coeff = sb_is_skippable_16x16(xd); - break; - case TX_8X8: - mbmi->mb_skip_coeff = sb_is_skippable_8x8(xd); - break; - case TX_4X4: - mbmi->mb_skip_coeff = sb_is_skippable_4x4(xd); - break; - default: assert(0); - } + mbmi->mb_skip_coeff = sb_is_skippable(xd, bsize, txfm_size, uv_txfm_size); if (mbmi->mb_skip_coeff) { if (!dry_run) cpi->skip_true_count[mb_skip_context] += skip_inc; if (!cm->mb_no_coeff_skip) { - vp9_stuff_sb(cpi, xd, t, dry_run); + vp9_stuff_sb(cpi, xd, t, dry_run, bsize); } else { - vp9_reset_sb_tokens_context(xd); + vp9_reset_sb_tokens_context(xd, bsize); } if (dry_run) *t = t_backup; @@ -518,217 +465,52 @@ void vp9_tokenize_sb(VP9_COMP *cpi, if (!dry_run) cpi->skip_false_count[mb_skip_context] += skip_inc; - switch (mbmi->txfm_size) { + switch (txfm_size) { case TX_32X32: - tokenize_b(cpi, xd, 0, t, PLANE_TYPE_Y_WITH_DC, - TX_32X32, 64, dry_run); - for (b = 64; b < 96; b += 16) - tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, - TX_16X16, 64, dry_run); - break; - case TX_16X16: - for (b = 0; b < 64; b += 16) - tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, - TX_16X16, 64, dry_run); - for (b = 64; b < 96; b += 16) - tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, - TX_16X16, 64, dry_run); - break; - case TX_8X8: - for (b = 0; b < 64; b += 4) - tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, - TX_8X8, 64, dry_run); - for (b = 64; b < 96; b += 4) - tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, - TX_8X8, 64, dry_run); - break; - case TX_4X4: - for (b = 0; b < 64; b++) + for (b = 0; b < n_y; b += 64) tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, - TX_4X4, 64, dry_run); - for (b = 64; b < 96; b++) - tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, - TX_4X4, 64, dry_run); - break; - default: assert(0); - } - - if (dry_run) - *t = t_backup; -} - -int vp9_sb64y_is_skippable_32x32(MACROBLOCKD *xd) { - int skip = 1; - int i = 0; - - for (i = 0; i < 256; i += 64) - skip &= (!xd->plane[0].eobs[i]); - - return skip; -} - -int vp9_sb64uv_is_skippable_32x32(MACROBLOCKD *xd) { - return (!xd->plane[1].eobs[0]) & (!xd->plane[2].eobs[0]); -} - -static int sb64_is_skippable_32x32(MACROBLOCKD *xd) { - return vp9_sb64y_is_skippable_32x32(xd) & vp9_sb64uv_is_skippable_32x32(xd); -} - -int vp9_sb64y_is_skippable_16x16(MACROBLOCKD *xd) { - int skip = 1; - int i = 0; - - for (i = 0; i < 256; i += 16) - skip &= (!xd->plane[0].eobs[i]); - - return skip; -} - -int vp9_sb64uv_is_skippable_16x16(MACROBLOCKD *xd) { - int skip = 1; - int i = 0; - - for (i = 0; i < 64; i += 16) - skip &= (!xd->plane[1].eobs[i]); - for (i = 0; i < 64; i += 16) - skip &= (!xd->plane[2].eobs[i]); - - return skip; -} - -static int sb64_is_skippable_16x16(MACROBLOCKD *xd) { - return vp9_sb64y_is_skippable_16x16(xd) & vp9_sb64uv_is_skippable_16x16(xd); -} - -int vp9_sb64y_is_skippable_8x8(MACROBLOCKD *xd) { - int skip = 1; - int i = 0; - - for (i = 0; i < 256; i += 4) - skip &= (!xd->plane[0].eobs[i]); - - return skip; -} - -int vp9_sb64uv_is_skippable_8x8(MACROBLOCKD *xd) { - int skip = 1; - int i = 0; - - for (i = 0; i < 64; i += 4) - skip &= (!xd->plane[1].eobs[i]); - for (i = 0; i < 64; i += 4) - skip &= (!xd->plane[2].eobs[i]); - - return skip; -} - -static int sb64_is_skippable_8x8(MACROBLOCKD *xd) { - return vp9_sb64y_is_skippable_8x8(xd) & vp9_sb64uv_is_skippable_8x8(xd); -} - -int vp9_sb64y_is_skippable_4x4(MACROBLOCKD *xd) { - int skip = 1; - int i = 0; - - for (i = 0; i < 256; i++) - skip &= (!xd->plane[0].eobs[i]); - - return skip; -} - -int vp9_sb64uv_is_skippable_4x4(MACROBLOCKD *xd) { - int skip = 1; - int i = 0; - - for (i = 0; i < 64; i++) - skip &= (!xd->plane[1].eobs[i]); - for (i = 0; i < 64; i++) - skip &= (!xd->plane[2].eobs[i]); - - return skip; -} - -static int sb64_is_skippable_4x4(MACROBLOCKD *xd) { - return vp9_sb64y_is_skippable_4x4(xd) & vp9_sb64uv_is_skippable_4x4(xd); -} - -void vp9_tokenize_sb64(VP9_COMP *cpi, - MACROBLOCKD *xd, - TOKENEXTRA **t, - int dry_run) { - VP9_COMMON * const cm = &cpi->common; - MB_MODE_INFO * const mbmi = &xd->mode_info_context->mbmi; - TOKENEXTRA *t_backup = *t; - const int mb_skip_context = vp9_get_pred_context(cm, xd, PRED_MBSKIP); - const int segment_id = mbmi->segment_id; - const int skip_inc = !vp9_segfeature_active(xd, segment_id, SEG_LVL_SKIP); - int b; - - switch (mbmi->txfm_size) { - case TX_32X32: - mbmi->mb_skip_coeff = sb64_is_skippable_32x32(xd); - break; - case TX_16X16: - mbmi->mb_skip_coeff = sb64_is_skippable_16x16(xd); - break; - case TX_8X8: - mbmi->mb_skip_coeff = sb64_is_skippable_8x8(xd); - break; - case TX_4X4: - mbmi->mb_skip_coeff = sb64_is_skippable_4x4(xd); - break; - default: assert(0); - } - - if (mbmi->mb_skip_coeff) { - if (!dry_run) - cpi->skip_true_count[mb_skip_context] += skip_inc; - if (!cm->mb_no_coeff_skip) { - vp9_stuff_sb64(cpi, xd, t, dry_run); - } else { - vp9_reset_sb64_tokens_context(xd); - } - if (dry_run) - *t = t_backup; - return; - } - - if (!dry_run) - cpi->skip_false_count[mb_skip_context] += skip_inc; - - switch (mbmi->txfm_size) { - case TX_32X32: - for (b = 0; b < 256; b += 64) - tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, - TX_32X32, 256, dry_run); - for (b = 256; b < 384; b += 64) - tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, - TX_32X32, 256, dry_run); + TX_32X32, n_y, dry_run); + if (uv_txfm_size == TX_32X32) { + assert(bsize == BLOCK_SIZE_SB64X64); + tokenize_b(cpi, xd, 256, t, PLANE_TYPE_UV, + TX_32X32, n_y, dry_run); + tokenize_b(cpi, xd, 320, t, PLANE_TYPE_UV, + TX_32X32, n_y, dry_run); + } else { + for (; b < n_uv; b += 16) + tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, + TX_16X16, n_y, dry_run); + } break; case TX_16X16: - for (b = 0; b < 256; b += 16) + for (b = 0; b < n_y; b += 16) tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, - TX_16X16, 256, dry_run); - for (b = 256; b < 384; b += 16) - tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, - TX_16X16, 256, dry_run); + TX_16X16, n_y, dry_run); + if (uv_txfm_size == TX_16X16) { + for (; b < n_uv; b += 16) + tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, + TX_16X16, n_y, dry_run); + } else { + for (; b < n_uv; b += 4) + tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, + TX_8X8, n_y, dry_run); + } break; case TX_8X8: - for (b = 0; b < 256; b += 4) + for (b = 0; b < n_y; b += 4) tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, - TX_8X8, 256, dry_run); - for (b = 256; b < 384; b += 4) + TX_8X8, n_y, dry_run); + for (; b < n_uv; b += 4) tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, - TX_8X8, 256, dry_run); + TX_8X8, n_y, dry_run); break; case TX_4X4: - for (b = 0; b < 256; b++) + for (b = 0; b < n_y; b++) tokenize_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, - TX_4X4, 256, dry_run); - for (b = 256; b < 384; b++) + TX_4X4, n_y, dry_run); + for (; b < n_uv; b++) tokenize_b(cpi, xd, b, t, PLANE_TYPE_UV, - TX_4X4, 256, dry_run); + TX_4X4, n_y, dry_run); break; default: assert(0); } @@ -1174,70 +956,53 @@ void vp9_stuff_mb(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run) { } } -void vp9_stuff_sb(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run) { - TOKENEXTRA * const t_backup = *t; +void vp9_stuff_sb(VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run, + BLOCK_SIZE_TYPE bsize) { + MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; + const int bwl = mb_width_log2(bsize) + 2, bhl = mb_height_log2(bsize) + 2; + const TX_SIZE txfm_size = mbmi->txfm_size; + const TX_SIZE uv_txfm_size = (bsize < BLOCK_SIZE_SB32X32 && + txfm_size == TX_16X16) ? TX_8X8 : + (bsize < BLOCK_SIZE_SB64X64 && + txfm_size == TX_32X32) ? TX_16X16 : txfm_size; int b; - - switch (xd->mode_info_context->mbmi.txfm_size) { - case TX_32X32: - stuff_b(cpi, xd, 0, t, PLANE_TYPE_Y_WITH_DC, TX_32X32, dry_run); - for (b = 64; b < 96; b += 16) - stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_16X16, dry_run); - break; - case TX_16X16: - for (b = 0; b < 64; b += 16) - stuff_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_16X16, dry_run); - for (b = 64; b < 96; b += 16) - stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_16X16, dry_run); - break; - case TX_8X8: - for (b = 0; b < 64; b += 4) - stuff_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_8X8, dry_run); - for (b = 64; b < 96; b += 4) - stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_8X8, dry_run); - break; - case TX_4X4: - for (b = 0; b < 64; b++) - stuff_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_4X4, dry_run); - for (b = 64; b < 96; b++) - stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_4X4, dry_run); - break; - default: assert(0); - } - - if (dry_run) { - *t = t_backup; - } -} - -void vp9_stuff_sb64(VP9_COMP *cpi, MACROBLOCKD *xd, - TOKENEXTRA **t, int dry_run) { + const int n_y = (1 << (bwl + bhl)), n_uv = (n_y * 3) >> 1; TOKENEXTRA * const t_backup = *t; - int b; - switch (xd->mode_info_context->mbmi.txfm_size) { + switch (txfm_size) { case TX_32X32: - for (b = 0; b < 256; b += 64) + for (b = 0; b < n_y; b += 64) stuff_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_32X32, dry_run); - for (b = 256; b < 384; b += 64) - stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_32X32, dry_run); + if (uv_txfm_size == TX_32X32) { + assert(bsize == BLOCK_SIZE_SB64X64); + stuff_b(cpi, xd, 256, t, PLANE_TYPE_UV, TX_32X32, dry_run); + stuff_b(cpi, xd, 320, t, PLANE_TYPE_UV, TX_32X32, dry_run); + } else { + for (; b < n_uv; b += 16) + stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_16X16, dry_run); + } break; case TX_16X16: - for (b = 0; b < 256; b += 16) + for (b = 0; b < n_y; b += 16) stuff_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_16X16, dry_run); - for (b = 256; b < 384; b += 16) - stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_16X16, dry_run); + if (uv_txfm_size == TX_16X16) { + for (; b < n_uv; b += 16) + stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_16X16, dry_run); + } else { + for (; b < n_uv; b += 4) + stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_8X8, dry_run); + } break; case TX_8X8: - for (b = 0; b < 256; b += 4) + for (b = 0; b < n_y; b += 4) stuff_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_8X8, dry_run); - for (b = 256; b < 384; b += 4) + for (; b < n_uv; b += 4) stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_8X8, dry_run); break; case TX_4X4: - for (b = 0; b < 256; b++) + for (b = 0; b < n_y; b++) stuff_b(cpi, xd, b, t, PLANE_TYPE_Y_WITH_DC, TX_4X4, dry_run); - for (b = 256; b < 384; b++) + for (; b < n_uv; b++) stuff_b(cpi, xd, b, t, PLANE_TYPE_UV, TX_4X4, dry_run); break; default: assert(0); diff --git a/vp9/encoder/vp9_tokenize.h b/vp9/encoder/vp9_tokenize.h index 907f814..decb34a 100644 --- a/vp9/encoder/vp9_tokenize.h +++ b/vp9/encoder/vp9_tokenize.h @@ -36,37 +36,21 @@ int vp9_mbuv_is_skippable_4x4(MACROBLOCKD *xd); int vp9_mby_is_skippable_8x8(MACROBLOCKD *xd); int vp9_mbuv_is_skippable_8x8(MACROBLOCKD *xd); int vp9_mby_is_skippable_16x16(MACROBLOCKD *xd); -int vp9_sby_is_skippable_32x32(MACROBLOCKD *xd); -int vp9_sby_is_skippable_16x16(MACROBLOCKD *xd); -int vp9_sby_is_skippable_8x8(MACROBLOCKD *xd); -int vp9_sby_is_skippable_4x4(MACROBLOCKD *xd); -int vp9_sbuv_is_skippable_16x16(MACROBLOCKD *xd); -int vp9_sbuv_is_skippable_8x8(MACROBLOCKD *xd); -int vp9_sbuv_is_skippable_4x4(MACROBLOCKD *xd); -int vp9_sb64y_is_skippable_32x32(MACROBLOCKD *xd); -int vp9_sb64y_is_skippable_16x16(MACROBLOCKD *xd); -int vp9_sb64y_is_skippable_8x8(MACROBLOCKD *xd); -int vp9_sb64y_is_skippable_4x4(MACROBLOCKD *xd); -int vp9_sb64uv_is_skippable_32x32(MACROBLOCKD *xd); -int vp9_sb64uv_is_skippable_16x16(MACROBLOCKD *xd); -int vp9_sb64uv_is_skippable_8x8(MACROBLOCKD *xd); -int vp9_sb64uv_is_skippable_4x4(MACROBLOCKD *xd); + +int vp9_sby_is_skippable(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize, TX_SIZE sz); +int vp9_sbuv_is_skippable(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize, TX_SIZE sz); struct VP9_COMP; void vp9_tokenize_mb(struct VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run); void vp9_tokenize_sb(struct VP9_COMP *cpi, MACROBLOCKD *xd, - TOKENEXTRA **t, int dry_run); -void vp9_tokenize_sb64(struct VP9_COMP *cpi, MACROBLOCKD *xd, - TOKENEXTRA **t, int dry_run); + TOKENEXTRA **t, int dry_run, BLOCK_SIZE_TYPE bsize); void vp9_stuff_mb(struct VP9_COMP *cpi, MACROBLOCKD *xd, TOKENEXTRA **t, int dry_run); void vp9_stuff_sb(struct VP9_COMP *cpi, MACROBLOCKD *xd, - TOKENEXTRA **t, int dry_run); -void vp9_stuff_sb64(struct VP9_COMP *cpi, MACROBLOCKD *xd, - TOKENEXTRA **t, int dry_run); + TOKENEXTRA **t, int dry_run, BLOCK_SIZE_TYPE bsize); #ifdef ENTROPY_STATS void init_context_counters(); diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk index 5e1ff62..8abd583 100644 --- a/vp9/vp9_common.mk +++ b/vp9/vp9_common.mk @@ -36,6 +36,7 @@ VP9_COMMON_SRCS-yes += common/vp9_common.h VP9_COMMON_SRCS-yes += common/vp9_entropy.h VP9_COMMON_SRCS-yes += common/vp9_entropymode.h VP9_COMMON_SRCS-yes += common/vp9_entropymv.h +VP9_COMMON_SRCS-yes += common/vp9_enums.h VP9_COMMON_SRCS-yes += common/vp9_extend.h VP9_COMMON_SRCS-yes += common/vp9_findnearmv.h VP9_COMMON_SRCS-yes += common/vp9_header.h -- 2.7.4