From adaec4d0fbb4b9b0cbcd06b82d3f24cc2ce4b332 Mon Sep 17 00:00:00 2001 From: Dmitry Kovalev Date: Fri, 5 Sep 2014 16:58:24 -0700 Subject: [PATCH] Speeding up decode_coeffs(). 1080p decoding speedup -- 1.25%, 4K decoding speedup -- 2.5%. Change-Id: I5f02f521cbf7758d4d1886a93bc5b074abdff03d --- vp9/decoder/vp9_detokenize.c | 150 +++++++++++++++++-------------------------- 1 file changed, 59 insertions(+), 91 deletions(-) diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c index 76ca1ae..df46f64 100644 --- a/vp9/decoder/vp9_detokenize.c +++ b/vp9/decoder/vp9_detokenize.c @@ -13,6 +13,7 @@ #include "vp9/common/vp9_blockd.h" #include "vp9/common/vp9_common.h" +#include "vp9/common/vp9_entropy.h" #include "vp9/decoder/vp9_detokenize.h" @@ -31,29 +32,31 @@ #define INCREMENT_COUNT(token) \ do { \ if (!cm->frame_parallel_decoding_mode) \ - ++coef_counts[band][ctx][token]; \ + ++coef_counts[band][ctx][token]; \ } while (0) -#define WRITE_COEF_CONTINUE(val, token) \ - { \ - v = (val * dqv) >> dq_shift; \ - dqcoeff[scan[c]] = vp9_read_bit(r) ? -v : v; \ - token_cache[scan[c]] = vp9_pt_energy_class[token]; \ - ++c; \ - ctx = get_coef_context(nb, token_cache, c); \ - dqv = dq[1]; \ - continue; \ - } +static INLINE int read_coeff(const vp9_prob *probs, int n, vp9_reader *r) { + int i, val = 0; + for (i = 0; i < n; ++i) + val = (val << 1) | vp9_read(r, probs[i]); + return val; +} -#define ADJUST_COEF(prob, bits_count) \ - do { \ - val += (vp9_read(r, prob) << bits_count); \ - } while (0) +static const vp9_tree_index coeff_subtree_high[TREE_SIZE(ENTROPY_TOKENS)] = { + 2, 6, /* 0 = LOW_VAL */ + -TWO_TOKEN, 4, /* 1 = TWO */ + -THREE_TOKEN, -FOUR_TOKEN, /* 2 = THREE */ + 8, 10, /* 3 = HIGH_LOW */ + -CATEGORY1_TOKEN, -CATEGORY2_TOKEN, /* 4 = CAT_ONE */ + 12, 14, /* 5 = CAT_THREEFOUR */ + -CATEGORY3_TOKEN, -CATEGORY4_TOKEN, /* 6 = CAT_THREE */ + -CATEGORY5_TOKEN, -CATEGORY6_TOKEN /* 7 = CAT_FIVE */ +}; static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, PLANE_TYPE type, - tran_low_t *dqcoeff, TX_SIZE tx_size, const int16_t *dq, - int ctx, const int16_t *scan, const int16_t *nb, - vp9_reader *r) { + tran_low_t *dqcoeff, TX_SIZE tx_size, const int16_t *dq, + int ctx, const int16_t *scan, const int16_t *nb, + vp9_reader *r) { const int max_eob = 16 << (tx_size << 1); const FRAME_CONTEXT *const fc = &cm->fc; FRAME_COUNTS *const counts = &cm->counts; @@ -69,11 +72,11 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, PLANE_TYPE type, uint8_t token_cache[32 * 32]; const uint8_t *band_translate = get_band_translate(tx_size); const int dq_shift = (tx_size == TX_32X32); - int v; + int v, token; int16_t dqv = dq[0]; while (c < max_eob) { - int val; + int val = -1; band = *band_translate++; prob = coef_probs[band][ctx]; if (!cm->frame_parallel_decoding_mode) @@ -95,81 +98,46 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, PLANE_TYPE type, prob = coef_probs[band][ctx]; } - // ONE_CONTEXT_NODE_0_ if (!vp9_read(r, prob[ONE_CONTEXT_NODE])) { INCREMENT_COUNT(ONE_TOKEN); - WRITE_COEF_CONTINUE(1, ONE_TOKEN); - } - - INCREMENT_COUNT(TWO_TOKEN); - - prob = vp9_pareto8_full[prob[PIVOT_NODE] - 1]; - - if (!vp9_read(r, prob[LOW_VAL_CONTEXT_NODE])) { - if (!vp9_read(r, prob[TWO_CONTEXT_NODE])) { - WRITE_COEF_CONTINUE(2, TWO_TOKEN); + token = ONE_TOKEN; + val = 1; + } else { + INCREMENT_COUNT(TWO_TOKEN); + token = vp9_read_tree(r, coeff_subtree_high, + vp9_pareto8_full[prob[PIVOT_NODE] - 1]); + switch (token) { + case TWO_TOKEN: + case THREE_TOKEN: + case FOUR_TOKEN: + val = token; + break; + case CATEGORY1_TOKEN: + val = CAT1_MIN_VAL + read_coeff(vp9_cat1_prob, 1, r); + break; + case CATEGORY2_TOKEN: + val = CAT2_MIN_VAL + read_coeff(vp9_cat2_prob, 2, r); + break; + case CATEGORY3_TOKEN: + val = CAT3_MIN_VAL + read_coeff(vp9_cat3_prob, 3, r); + break; + case CATEGORY4_TOKEN: + val = CAT4_MIN_VAL + read_coeff(vp9_cat4_prob, 4, r); + break; + case CATEGORY5_TOKEN: + val = CAT5_MIN_VAL + read_coeff(vp9_cat5_prob, 5, r); + break; + case CATEGORY6_TOKEN: + val = CAT6_MIN_VAL + read_coeff(vp9_cat6_prob, 14, r); + break; } - if (!vp9_read(r, prob[THREE_CONTEXT_NODE])) { - WRITE_COEF_CONTINUE(3, THREE_TOKEN); - } - WRITE_COEF_CONTINUE(4, FOUR_TOKEN); } - - if (!vp9_read(r, prob[HIGH_LOW_CONTEXT_NODE])) { - if (!vp9_read(r, prob[CAT_ONE_CONTEXT_NODE])) { - val = CAT1_MIN_VAL; - ADJUST_COEF(vp9_cat1_prob[0], 0); - WRITE_COEF_CONTINUE(val, CATEGORY1_TOKEN); - } - val = CAT2_MIN_VAL; - ADJUST_COEF(vp9_cat2_prob[0], 1); - ADJUST_COEF(vp9_cat2_prob[1], 0); - WRITE_COEF_CONTINUE(val, CATEGORY2_TOKEN); - } - - if (!vp9_read(r, prob[CAT_THREEFOUR_CONTEXT_NODE])) { - if (!vp9_read(r, prob[CAT_THREE_CONTEXT_NODE])) { - val = CAT3_MIN_VAL; - ADJUST_COEF(vp9_cat3_prob[0], 2); - ADJUST_COEF(vp9_cat3_prob[1], 1); - ADJUST_COEF(vp9_cat3_prob[2], 0); - WRITE_COEF_CONTINUE(val, CATEGORY3_TOKEN); - } - val = CAT4_MIN_VAL; - ADJUST_COEF(vp9_cat4_prob[0], 3); - ADJUST_COEF(vp9_cat4_prob[1], 2); - ADJUST_COEF(vp9_cat4_prob[2], 1); - ADJUST_COEF(vp9_cat4_prob[3], 0); - WRITE_COEF_CONTINUE(val, CATEGORY4_TOKEN); - } - - if (!vp9_read(r, prob[CAT_FIVE_CONTEXT_NODE])) { - val = CAT5_MIN_VAL; - ADJUST_COEF(vp9_cat5_prob[0], 4); - ADJUST_COEF(vp9_cat5_prob[1], 3); - ADJUST_COEF(vp9_cat5_prob[2], 2); - ADJUST_COEF(vp9_cat5_prob[3], 1); - ADJUST_COEF(vp9_cat5_prob[4], 0); - WRITE_COEF_CONTINUE(val, CATEGORY5_TOKEN); - } - val = 0; - val = (val << 1) | vp9_read(r, vp9_cat6_prob[0]); - val = (val << 1) | vp9_read(r, vp9_cat6_prob[1]); - val = (val << 1) | vp9_read(r, vp9_cat6_prob[2]); - val = (val << 1) | vp9_read(r, vp9_cat6_prob[3]); - val = (val << 1) | vp9_read(r, vp9_cat6_prob[4]); - val = (val << 1) | vp9_read(r, vp9_cat6_prob[5]); - val = (val << 1) | vp9_read(r, vp9_cat6_prob[6]); - val = (val << 1) | vp9_read(r, vp9_cat6_prob[7]); - val = (val << 1) | vp9_read(r, vp9_cat6_prob[8]); - val = (val << 1) | vp9_read(r, vp9_cat6_prob[9]); - val = (val << 1) | vp9_read(r, vp9_cat6_prob[10]); - val = (val << 1) | vp9_read(r, vp9_cat6_prob[11]); - val = (val << 1) | vp9_read(r, vp9_cat6_prob[12]); - val = (val << 1) | vp9_read(r, vp9_cat6_prob[13]); - val += CAT6_MIN_VAL; - - WRITE_COEF_CONTINUE(val, CATEGORY6_TOKEN); + v = (val * dqv) >> dq_shift; + dqcoeff[scan[c]] = vp9_read_bit(r) ? -v : v; + token_cache[scan[c]] = vp9_pt_energy_class[token]; + ++c; + ctx = get_coef_context(nb, token_cache, c); + dqv = dq[1]; } return c; -- 2.7.4