Some minor optimizations for cost_coeffs().

author Ronald S. Bultje <rbultje@google.com>

Fri, 28 Jun 2013 03:57:37 +0000 (20:57 -0700)

committer Ronald S. Bultje <rbultje@google.com>

Fri, 28 Jun 2013 17:29:02 +0000 (10:29 -0700)
author Ronald S. Bultje <rbultje@google.com>
Fri, 28 Jun 2013 03:57:37 +0000 (20:57 -0700)
committer Ronald S. Bultje <rbultje@google.com>
Fri, 28 Jun 2013 17:29:02 +0000 (10:29 -0700)
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h

index 74f61a1..1e7aad7 100644 (file)
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -133,8 +133,7 @@ struct macroblock {
    unsigned char *active_ptr;
  
    // note that token_costs is the cost when eob node is skipped
-  vp9_coeff_count token_costs[TX_SIZE_MAX_SB][BLOCK_TYPES];
-  vp9_coeff_count token_costs_noskip[TX_SIZE_MAX_SB][BLOCK_TYPES];
+  vp9_coeff_count token_costs[TX_SIZE_MAX_SB][BLOCK_TYPES][2];
  
    int optimize;
  
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c

index e13ffbd..4cfdff3 100644 (file)
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -223,11 +223,11 @@ static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb,
          pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache,
                                         pad, default_eob);
          rate0 +=
-          mb->token_costs_noskip[tx_size][type][ref][band][pt]
-                                [tokens[next][0].token];
+          mb->token_costs[tx_size][type][ref][0][band][pt]
+                         [tokens[next][0].token];
          rate1 +=
-          mb->token_costs_noskip[tx_size][type][ref][band][pt]
-                                [tokens[next][1].token];
+          mb->token_costs[tx_size][type][ref][0][band][pt]
+                         [tokens[next][1].token];
        }
        UPDATE_RD_COST();
        /* And pick the best. */
@@ -275,22 +275,14 @@ static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb,
          if (t0 != DCT_EOB_TOKEN) {
            pt = trellis_get_coeff_context(scan, nb, i, t0, token_cache,
                                           pad, default_eob);
-          if (!x)
-            rate0 += mb->token_costs[tx_size][type][ref][band][pt][
-                tokens[next][0].token];
-          else
-            rate0 += mb->token_costs_noskip[tx_size][type][ref][band][pt][
-                tokens[next][0].token];
+          rate0 += mb->token_costs[tx_size][type][ref][!x][band][pt]
+                                  [tokens[next][0].token];
          }
          if (t1 != DCT_EOB_TOKEN) {
            pt = trellis_get_coeff_context(scan, nb, i, t1, token_cache,
                                           pad, default_eob);
-          if (!x)
-            rate1 += mb->token_costs[tx_size][type][ref][band][pt][
-                tokens[next][1].token];
-          else
-            rate1 += mb->token_costs_noskip[tx_size][type][ref][band][pt][
-                tokens[next][1].token];
+          rate1 += mb->token_costs[tx_size][type][ref][!x][band][pt]
+                                  [tokens[next][1].token];
          }
        }
  
@@ -322,12 +314,12 @@ static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb,
        /* Update the cost of each path if we're past the EOB token. */
        if (t0 != DCT_EOB_TOKEN) {
          tokens[next][0].rate +=
-            mb->token_costs[tx_size][type][ref][band][0][t0];
+            mb->token_costs[tx_size][type][ref][1][band][0][t0];
          tokens[next][0].token = ZERO_TOKEN;
        }
        if (t1 != DCT_EOB_TOKEN) {
          tokens[next][1].rate +=
-            mb->token_costs[tx_size][type][ref][band][0][t1];
+            mb->token_costs[tx_size][type][ref][1][band][0][t1];
          tokens[next][1].token = ZERO_TOKEN;
        }
        /* Don't update next, because we didn't add a new node. */
@@ -343,8 +335,8 @@ static void optimize_b(VP9_COMMON *const cm, MACROBLOCK *mb,
    error1 = tokens[next][1].error;
    t0 = tokens[next][0].token;
    t1 = tokens[next][1].token;
-  rate0 += mb->token_costs_noskip[tx_size][type][ref][band][pt][t0];
-  rate1 += mb->token_costs_noskip[tx_size][type][ref][band][pt][t1];
+  rate0 += mb->token_costs[tx_size][type][ref][0][band][pt][t0];
+  rate1 += mb->token_costs[tx_size][type][ref][0][band][pt][t1];
    UPDATE_RD_COST();
    best = rd_cost1 < rd_cost0;
    final_eob = i0 - 1;
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c

index 7a2ec56..46d9bce 100644 (file)
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -116,8 +116,7 @@ static int rd_thresh_block_size_factor[BLOCK_SIZE_TYPES] =
  #define MAX_RD_THRESH_FREQ_FACT 32
  #define MAX_RD_THRESH_FREQ_INC 1
  
-static void fill_token_costs(vp9_coeff_count (*c)[BLOCK_TYPES],
-                             vp9_coeff_count (*cnoskip)[BLOCK_TYPES],
+static void fill_token_costs(vp9_coeff_count (*c)[BLOCK_TYPES][2],
                               vp9_coeff_probs_model (*p)[BLOCK_TYPES]) {
    int i, j, k, l;
    TX_SIZE t;
@@ -128,18 +127,18 @@ static void fill_token_costs(vp9_coeff_count (*c)[BLOCK_TYPES],
            for (l = 0; l < PREV_COEF_CONTEXTS; l++) {
              vp9_prob probs[ENTROPY_NODES];
              vp9_model_to_full_probs(p[t][i][j][k][l], probs);
-            vp9_cost_tokens((int *)cnoskip[t][i][j][k][l], probs,
+            vp9_cost_tokens((int *)c[t][i][j][0][k][l], probs,
                              vp9_coef_tree);
  #if CONFIG_BALANCED_COEFTREE
              // Replace the eob node prob with a very small value so that the
              // cost approximately equals the cost without the eob node
              probs[1] = 1;
-            vp9_cost_tokens((int *)c[t][i][j][k][l], probs, vp9_coef_tree);
+            vp9_cost_tokens((int *)c[t][i][j][1][k][l], probs, vp9_coef_tree);
  #else
-            vp9_cost_tokens_skip((int *)c[t][i][j][k][l], probs,
+            vp9_cost_tokens_skip((int *)c[t][i][j][1][k][l], probs,
                                   vp9_coef_tree);
-            assert(c[t][i][j][k][l][DCT_EOB_TOKEN] ==
-                   cnoskip[t][i][j][k][l][DCT_EOB_TOKEN]);
+            assert(c[t][i][j][0][k][l][DCT_EOB_TOKEN] ==
+                   c[t][i][j][1][k][l][DCT_EOB_TOKEN]);
  #endif
            }
  }
@@ -260,9 +259,7 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi, int qindex) {
      }
    }
  
-  fill_token_costs(cpi->mb.token_costs,
-                   cpi->mb.token_costs_noskip,
-                   cpi->common.fc.coef_probs);
+  fill_token_costs(cpi->mb.token_costs, cpi->common.fc.coef_probs);
  
    for (i = 0; i < NUM_PARTITION_CONTEXTS; i++)
      vp9_cost_tokens(cpi->mb.partition_cost[i],
@@ -310,18 +307,13 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
    int cost = 0, pad;
    const int *scan, *nb;
    const int eob = xd->plane[plane].eobs[block];
-  const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff,
-                                           block, 16);
+  const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[plane].qcoeff, block, 16);
    const int ref = mbmi->ref_frame[0] != INTRA_FRAME;
-  unsigned int (*token_costs)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
-      mb->token_costs[tx_size][type][ref];
+  unsigned int (*token_costs)[COEF_BANDS][PREV_COEF_CONTEXTS]
+                    [MAX_ENTROPY_TOKENS] = mb->token_costs[tx_size][type][ref];
    ENTROPY_CONTEXT above_ec, left_ec;
    TX_TYPE tx_type = DCT_DCT;
-
    const int segment_id = xd->mode_info_context->mbmi.segment_id;
-  unsigned int (*token_costs_noskip)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
-      mb->token_costs_noskip[tx_size][type][ref];
-
    int seg_eob, default_eob;
    uint8_t token_cache[1024];
    const uint8_t * band_translate;
@@ -390,26 +382,38 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
    if (eob < seg_eob)
      assert(qcoeff_ptr[scan[eob]] == 0);
  
-  {
-    for (c = 0; c < eob; c++) {
-      int v = qcoeff_ptr[scan[c]];
-      int t = vp9_dct_value_tokens_ptr[v].token;
+  if (eob == 0) {
+    // single eob token
+    cost += token_costs[0][0][pt][DCT_EOB_TOKEN];
+  } else {
+    int t, v, prev_rc = 0;
+
+    // dc token
+    v = qcoeff_ptr[0];
+    t = vp9_dct_value_tokens_ptr[v].token;
+    cost += token_costs[0][0][pt][t] + vp9_dct_value_cost_ptr[v];
+    token_cache[0] = vp9_pt_energy_class[t];
+
+    // ac tokens
+    for (c = 1; c < eob; c++) {
+      const int rc = scan[c];
        int band = get_coef_band(band_translate, c);
-      if (c)
-        pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
  
-      if (!c || token_cache[scan[c - 1]])  // do not skip eob
-        cost += token_costs_noskip[band][pt][t] + vp9_dct_value_cost_ptr[v];
-      else
-        cost += token_costs[band][pt][t] + vp9_dct_value_cost_ptr[v];
-      token_cache[scan[c]] = vp9_pt_energy_class[t];
+      v = qcoeff_ptr[rc];
+      t = vp9_dct_value_tokens_ptr[v].token;
+      pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
+      // as an index at some level
+      cost += token_costs[!token_cache[prev_rc]][band][pt][t] +
+              vp9_dct_value_cost_ptr[v];
+      token_cache[rc] = vp9_pt_energy_class[t];
+      prev_rc = rc;
      }
+
+    // eob token
      if (c < seg_eob) {
-      if (c)
-        pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
-      cost += mb->token_costs_noskip[tx_size][type][ref]
-          [get_coef_band(band_translate, c)]
-          [pt][DCT_EOB_TOKEN];
+      pt = vp9_get_coef_context(scan, nb, pad, token_cache, c, default_eob);
+      cost += token_costs[0][get_coef_band(band_translate, c)][pt]
+                         [DCT_EOB_TOKEN];
      }
    }
author	Ronald S. Bultje <rbultje@google.com>
	Fri, 28 Jun 2013 03:57:37 +0000 (20:57 -0700)
committer	Ronald S. Bultje <rbultje@google.com>
	Fri, 28 Jun 2013 17:29:02 +0000 (10:29 -0700)
vp9/encoder/vp9_block.h		patch \| blob \| history
vp9/encoder/vp9_encodemb.c		patch \| blob \| history
vp9/encoder/vp9_rdopt.c		patch \| blob \| history