From 3a6ec9ea72b7cc85f7dceb0753ee2d665ee2ef8c Mon Sep 17 00:00:00 2001 From: Alex Converse Date: Thu, 16 Mar 2017 16:34:26 -0700 Subject: [PATCH] vp9_optimize_b: Combine extrabits cost with token lookup About 0.6% fewer cycles spent in vp9_optimize_b. Change-Id: I2ae62a78374c594ed81d4e3100a5848e2f6f2c4e --- vp9/encoder/vp9_encodemb.c | 9 +++------ vp9/encoder/vp9_tokenize.h | 22 ++++++++++++++++------ 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 1cf0b02..cf92dba 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -108,7 +108,6 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size, int rate0, rate1; int64_t error0, error1; int16_t t0, t1; - EXTRABIT e0; unsigned int(*const token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] = mb->token_costs[tx_size][type][ref]; int best, band, pt, i, final_eob; @@ -144,7 +143,7 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size, /* Evaluate the first possibility for this state. */ rate0 = tokens[next][0].rate; rate1 = tokens[next][1].rate; - vp9_get_token_extra(x, &t0, &e0); + vp9_get_token_extracost(cat6_high_cost, x, &t0, &base_bits); /* Consider both possible successor states. */ if (next < default_eob) { band = band_translate[i + 1]; @@ -155,7 +154,6 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size, UPDATE_RD_COST(); /* And pick the best. */ best = rd_cost1 < rd_cost0; - base_bits = vp9_get_cost(t0, e0, cat6_high_cost); dx = (dqcoeff[rc] - coeff[rc]) * (1 << shift); #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { @@ -193,9 +191,9 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size, */ t0 = tokens[next][0].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN; t1 = tokens[next][1].token == EOB_TOKEN ? EOB_TOKEN : ZERO_TOKEN; - e0 = 0; + base_bits = 0; } else { - vp9_get_token_extra(x, &t0, &e0); + vp9_get_token_extracost(cat6_high_cost, x, &t0, &base_bits); t1 = t0; } if (next < default_eob) { @@ -213,7 +211,6 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size, UPDATE_RD_COST(); /* And pick the best. */ best = rd_cost1 < rd_cost0; - base_bits = vp9_get_cost(t0, e0, cat6_high_cost); #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { diff --git a/vp9/encoder/vp9_tokenize.h b/vp9/encoder/vp9_tokenize.h index c7694af..4b8d7ad 100644 --- a/vp9/encoder/vp9_tokenize.h +++ b/vp9/encoder/vp9_tokenize.h @@ -79,12 +79,22 @@ extern const int16_t vp9_cat6_low_cost[256]; extern const uint16_t vp9_cat6_high_cost[64]; extern const uint16_t vp9_cat6_high10_high_cost[256]; extern const uint16_t vp9_cat6_high12_high_cost[1024]; -static INLINE int vp9_get_cost(int16_t token, EXTRABIT extrabits, - const uint16_t *cat6_high_table) { - if (token != CATEGORY6_TOKEN) - return vp9_extra_bits[token].cost[extrabits >> 1]; - return vp9_cat6_low_cost[(extrabits >> 1) & 0xff] + - cat6_high_table[extrabits >> 9]; + +static INLINE void vp9_get_token_extracost(const uint16_t *cat6_high_table, + int v, int16_t *token, + int *extracost) { + EXTRABIT extrabits; // unsigned extrabits + v = abs(v); + if (v >= CAT6_MIN_VAL) { + *token = CATEGORY6_TOKEN; + extrabits = v - CAT6_MIN_VAL; + *extracost = + vp9_cat6_low_cost[extrabits & 0xff] + cat6_high_table[extrabits >> 8]; + } else { + *token = vp9_dct_cat_lt_10_value_tokens[v].token; + extrabits = vp9_dct_cat_lt_10_value_tokens[v].extra >> 1; + *extracost = vp9_extra_bits[*token].cost[extrabits]; + } } #if CONFIG_VP9_HIGHBITDEPTH -- 2.7.4