From c7846ebc3438aa3e1611398567bf950d0656c590 Mon Sep 17 00:00:00 2001
From: Jingning Han <jingning@google.com>
Date: Wed, 1 Aug 2012 10:18:25 -0700
Subject: [PATCH] Use 8x8 DCT transform for I8X8 prediction mode

Apply 2D-DCT transform of dimension 8x8 to encode prediction
residuals of I8X8 mode.
Brought back block type 3 probability context model for 8x8 tokens,
which is used for the coefficients of Y blocks in I8x8 modes. The
coefficient costs estimate of I8X8 mode in rate-distortion is also
changed appropriately.
Performance results:
derf:   0.246
yt:     0.114
std-hd: 0.730
hd:     0.670

Change-Id: If1d970eeb4e1827c9f0d2c5b27d33089b347ea27
---
 configure                       |   1 +
 vp8/common/default_coef_probs.h |  53 ++++++++++++++++++++
 vp8/common/entropy.h            |   4 ++
 vp8/decoder/decodframe.c        |  25 ++++++++++
 vp8/decoder/detokenize.c        |  38 +++++++++++++-
 vp8/encoder/encodeintra.c       |  23 ++++++++-
 vp8/encoder/rdopt.c             |  48 ++++++++++++++++++
 vp8/encoder/tokenize.c          | 108 ++++++++++++++++++++++++++++++++++++++--
 8 files changed, 294 insertions(+), 6 deletions(-)

diff --git a/configure b/configure
index 525ccd1..269d997 100755
--- a/configure
+++ b/configure
@@ -226,6 +226,7 @@ EXPERIMENT_LIST="
     lossless
     hybridtransform
     switchable_interp
+    htrans8x8
 "
 CONFIG_LIST="
     external_build
diff --git a/vp8/common/default_coef_probs.h b/vp8/common/default_coef_probs.h
index d0e114a..145faf1 100644
--- a/vp8/common/default_coef_probs.h
+++ b/vp8/common/default_coef_probs.h
@@ -434,4 +434,57 @@ vp8_default_coef_probs_8x8[BLOCK_TYPES_8X8]
       { 6, 117, 180, 254, 199, 216, 255, 251, 128, 128, 128}
     }
   }
+#if CONFIG_HTRANS8X8
+  ,
+  { /* block Type 3 */
+    { /* Coeff Band 0 */
+      { 192, 18, 155, 172, 145, 164, 192, 135, 246, 223, 255},
+      { 94, 29, 97, 131, 131, 153, 171, 121, 250, 190, 255},
+      { 25, 29, 63, 128, 119, 147, 168, 124, 251, 183, 255},
+      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}
+    },
+    { /* Coeff Band 1 */
+      { 1, 108, 192, 220, 186, 173, 255, 194, 255, 255, 128},
+      { 123, 104, 188, 221, 165, 171, 247, 180, 255, 255, 128},
+      { 23, 76, 152, 216, 154, 166, 226, 182, 255, 209, 128},
+      { 1, 26, 52, 162, 109, 152, 208, 144, 255, 231, 128}
+    },
+    { /* Coeff Band 2 */
+      { 1, 57, 179, 220, 156, 175, 210, 158, 255, 223, 128},
+      { 48, 57, 134, 212, 151, 170, 219, 185, 255, 248, 128},
+      { 4, 35, 63, 189, 120, 156, 221, 159, 255, 241, 128},
+      { 1, 17, 23, 110, 97, 143, 187, 120, 255, 234, 128}
+    },
+    { /* Coeff Band 3 */
+      { 1, 115, 205, 243, 182, 187, 254, 218, 255, 255, 128},
+      { 80, 101, 186, 241, 183, 186, 249, 182, 255, 255, 128},
+      { 10, 81, 144, 229, 164, 175, 241, 185, 255, 255, 128},
+      { 1, 44, 81, 192, 130, 148, 240, 180, 255, 255, 128}
+    },
+    { /* Coeff Band 4 */
+      { 1, 161, 207, 249, 187, 176, 255, 180, 128, 128, 128},
+      { 79, 148, 196, 240, 186, 182, 253, 171, 255, 255, 128},
+      { 14, 111, 171, 233, 170, 178, 235, 204, 255, 255, 128},
+      { 1, 63, 103, 202, 143, 162, 240, 178, 255, 255, 128}
+    },
+    { /* Coeff Band 5 */
+      { 1, 101, 202, 239, 185, 184, 252, 186, 255, 255, 128},
+      { 43, 67, 166, 237, 178, 190, 246, 194, 255, 255, 128},
+      { 4, 49, 85, 220, 140, 168, 253, 182, 255, 255, 128},
+      { 1, 24, 35, 144, 93, 135, 239, 159, 255, 253, 128}
+    },
+    { /* Coeff Band 6 */
+      { 1, 212, 243, 255, 240, 234, 255, 255, 128, 128, 128},
+      { 98, 168, 234, 255, 229, 234, 255, 255, 128, 128, 128},
+      { 19, 127, 199, 255, 212, 198, 255, 255, 128, 128, 128},
+      { 1, 103, 162, 253, 186, 151, 255, 255, 128, 128, 128}
+    },
+    { /* Coeff Band 7 */
+      { 1, 188, 253, 255, 255, 128, 128, 128, 128, 128, 128},
+      { 191, 68, 242, 255, 255, 128, 128, 128, 128, 128, 128},
+      { 8, 132, 255, 128, 128, 128, 128, 128, 128, 128, 128},
+      { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}
+    }
+  }
+#endif
 };
diff --git a/vp8/common/entropy.h b/vp8/common/entropy.h
index 9993741..36cae41 100644
--- a/vp8/common/entropy.h
+++ b/vp8/common/entropy.h
@@ -63,7 +63,11 @@ extern vp8_extra_bit_struct vp8_extra_bits[12];    /* indexed by token value */
 
 #define BLOCK_TYPES 4
 
+#if CONFIG_HTRANS8X8
+#define BLOCK_TYPES_8X8 4
+#else
 #define BLOCK_TYPES_8X8 3
+#endif
 
 /* Middle dimension is a coarsening of the coefficient's
    position within the 4x4 DCT. */
diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index c31595d..47d0faa 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -235,6 +235,13 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
         xd->mode_info_context->mbmi.txfm_size = TX_8X8;
     }
   }
+
+#if CONFIG_HTRANS8X8
+  if (xd->mode_info_context->mbmi.mode == I8X8_PRED) {
+    xd->mode_info_context->mbmi.txfm_size = TX_8X8;
+  }
+#endif
+
   tx_type = xd->mode_info_context->mbmi.txfm_size;
 
   if (xd->mode_info_context->mbmi.mb_skip_coeff) {
@@ -356,11 +363,28 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
       int i8x8mode;
       BLOCKD *b;
 
+#if CONFIG_HTRANS8X8
+      int idx = (ib & 0x02) ? (ib + 2) : ib;
+
+      short *q  = xd->block[idx].qcoeff;
+      short *dq = xd->block[0].dequant;
+      unsigned char *pre = xd->block[ib].predictor;
+      unsigned char *dst = *(xd->block[ib].base_dst) + xd->block[ib].dst;
+      int stride = xd->dst.y_stride;
+
+      tx_type = TX_4X4;
+      xd->mode_info_context->mbmi.txfm_size = TX_4X4;
+#endif
+
       b = &xd->block[ib];
       i8x8mode = b->bmi.as_mode.first;
       RECON_INVOKE(RTCD_VTABLE(recon), intra8x8_predict)
       (b, i8x8mode, b->predictor);
 
+#if CONFIG_HTRANS8X8
+      vp8_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride);
+      q += 64;
+#else
       for (j = 0; j < 4; j++) {
         b = &xd->block[ib + iblock[j]];
         if (xd->eobs[ib + iblock[j]] > 1) {
@@ -374,6 +398,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd,
           ((int *)b->qcoeff)[0] = 0;
         }
       }
+#endif
 
       b = &xd->block[16 + i];
       RECON_INVOKE(RTCD_VTABLE(recon), intra_uv4x4_predict)
diff --git a/vp8/decoder/detokenize.c b/vp8/decoder/detokenize.c
index 1acde4c..155877a 100644
--- a/vp8/decoder/detokenize.c
+++ b/vp8/decoder/detokenize.c
@@ -124,7 +124,8 @@ void static count_tokens_adaptive_scan(const MACROBLOCKD *xd, INT16 *qcoeff_ptr,
 
   int QIndex = xd->q_index;
   int active_ht = (QIndex < ACTIVE_HT) &&
-                  (xd->mode_info_context->mbmi.mode == B_PRED);
+                  (xd->mode_info_context->mbmi.mode == B_PRED) &&
+                  (type == PLANE_TYPE_Y_WITH_DC);
 
   if(active_ht) {
     switch(xd->block[block].bmi.as_mode.tx_type) {
@@ -351,8 +352,16 @@ int vp8_decode_mb_tokens_8x8(VP8D_COMP *pbi, MACROBLOCKD *xd) {
   const int segment_id = xd->mode_info_context->mbmi.segment_id;
   const int seg_active = segfeature_active(xd, segment_id, SEG_LVL_EOB);
   INT16 *qcoeff_ptr = &xd->qcoeff[0];
+
+#if CONFIG_HTRANS8X8
+  int bufthred = (xd->mode_info_context->mbmi.mode == I8X8_PRED) ? 16 : 24;
+  if (xd->mode_info_context->mbmi.mode != B_PRED &&
+      xd->mode_info_context->mbmi.mode != SPLITMV &&
+      xd->mode_info_context->mbmi.mode != I8X8_PRED) {
+#else
   if (xd->mode_info_context->mbmi.mode != B_PRED &&
       xd->mode_info_context->mbmi.mode != SPLITMV) {
+#endif
     ENTROPY_CONTEXT *const a = A + vp8_block2above_8x8[24];
     ENTROPY_CONTEXT *const l = L + vp8_block2left_8x8[24];
     const int *const scan = vp8_default_zig_zag1d;
@@ -376,10 +385,16 @@ int vp8_decode_mb_tokens_8x8(VP8D_COMP *pbi, MACROBLOCKD *xd) {
     seg_eob = get_segdata(xd, segment_id, SEG_LVL_EOB);
   else
     seg_eob = 64;
+
+#if CONFIG_HTRANS8X8
+  for (i = 0; i < bufthred ; i += 4) {
+#else
   for (i = 0; i < 24; i += 4) {
+#endif
     ENTROPY_CONTEXT *const a = A + vp8_block2above_8x8[i];
     ENTROPY_CONTEXT *const l = L + vp8_block2left_8x8[i];
     const int *const scan = vp8_default_zig_zag1d_8x8;
+
     if (i == 16)
       type = PLANE_TYPE_UV;
 
@@ -393,6 +408,27 @@ int vp8_decode_mb_tokens_8x8(VP8D_COMP *pbi, MACROBLOCKD *xd) {
     qcoeff_ptr += 64;
   }
 
+#if CONFIG_HTRANS8X8
+  if (xd->mode_info_context->mbmi.mode == I8X8_PRED) {
+    type = PLANE_TYPE_UV;
+    seg_eob = 16;
+
+    // use 4x4 transform for U, V components in I8X8 prediction mode
+    for (i = 16; i < 24; i++) {
+      ENTROPY_CONTEXT *const a = A + vp8_block2above[i];
+      ENTROPY_CONTEXT *const l = L + vp8_block2left[i];
+      const int *scan = vp8_default_zig_zag1d;
+
+      c = vp8_decode_coefs(pbi, xd, a, l, type, seg_eob, qcoeff_ptr,
+                           i, scan, TX_4X4, coef_bands_x);
+      a[0] = l[0] = ((eobs[i] = c) != !type);
+
+      eobtotal += c;
+      qcoeff_ptr += 16;
+    }
+  }
+#endif
+
   return eobtotal;
 }
 
diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c
index 46b352e..01ae03a 100644
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@@ -306,6 +306,25 @@ void vp8_encode_intra8x8(const VP8_ENCODER_RTCD *rtcd,
   }
 #endif
 
+#if CONFIG_HTRANS8X8
+  {
+    MACROBLOCKD *xd = &x->e_mbd;
+    int idx = (ib & 0x02) ? (ib + 2) : ib;
+
+    // generate residual blocks
+    vp8_subtract_4b_c(be, b, 16);
+    x->vp8_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32);
+    x->quantize_b_8x8(x->block + idx, xd->block + idx);
+    vp8_short_idct8x8_c(xd->block[idx].dqcoeff, xd->block[ib].diff, 32);
+
+    // reconstruct submacroblock
+    for (i = 0; i < 4; i++) {
+      b = &xd->block[ib + iblock[i]];
+      vp8_recon_b_c(b->predictor, b->diff, *(b->base_dst) + b->dst,
+                    b->dst_stride);
+    }
+  }
+#else
   for (i = 0; i < 4; i++) {
     b = &x->e_mbd.block[ib + iblock[i]];
     be = &x->block[ib + iblock[i]];
@@ -314,8 +333,10 @@ void vp8_encode_intra8x8(const VP8_ENCODER_RTCD *rtcd,
     x->quantize_b(be, b);
     vp8_inverse_transform_b(IF_RTCD(&rtcd->common->idct), b, 32);
     RECON_INVOKE(&rtcd->common->recon, recon)(b->predictor,
-                                              b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+                                              b->diff, *(b->base_dst) + b->dst,
+                                              b->dst_stride);
   }
+#endif
 }
 
 extern const int vp8_i8x8_block[4];
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index 5fd92a5..ad9b4ce 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -447,6 +447,20 @@ int vp8_block_error_c(short *coeff, short *dqcoeff) {
   return error;
 }
 
+#if CONFIG_HTRANS8X8
+int vp8_submb_error_c(short *coeff, short *dqcoeff) {
+  int i;
+  int error = 0;
+
+  for (i = 0; i < 64; i++) {
+    int this_diff = coeff[i] - dqcoeff[i];
+    error += this_diff * this_diff;
+  }
+
+  return error;
+}
+#endif
+
 int vp8_mbblock_error_c(MACROBLOCK *mb, int dc) {
   BLOCK  *be;
   BLOCKD *bd;
@@ -1175,6 +1189,12 @@ static int rd_pick_intra8x8block(
   DECLARE_ALIGNED_ARRAY(16, unsigned char,  best_predictor, 16 * 8);
   DECLARE_ALIGNED_ARRAY(16, short, best_dqcoeff, 16 * 4);
 
+#if CONFIG_HTRANS8X8
+  // perform transformation of dimension 8x8
+  // note the input and output index mapping
+  int idx = (ib & 0x02) ? (ib + 2) : ib;
+#endif
+
   for (mode = DC_PRED; mode <= TM_PRED; mode++) {
 #if CONFIG_COMP_INTRA_PRED
     for (mode2 = DC_PRED - 1; mode2 != TM_PRED + 1; mode2++) {
@@ -1200,6 +1220,24 @@ static int rd_pick_intra8x8block(
 
       vp8_subtract_4b_c(be, b, 16);
 
+#if CONFIG_HTRANS8X8
+      x->vp8_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32);
+      x->quantize_b_8x8(x->block + idx, xd->block + idx);
+
+      // compute quantization mse of 8x8 block
+      distortion = vp8_submb_error_c((x->block + idx)->coeff,
+                                     (xd->block + idx)->dqcoeff)>>2;
+
+      ta0 = *(a + vp8_block2above_8x8[idx]);
+      tl0 = *(l + vp8_block2left_8x8 [idx]);
+
+      rate_t = cost_coeffs_8x8(x, xd->block + idx, PLANE_TYPE_Y_WITH_DC,
+                               &ta0,
+                               &tl0);
+      rate += rate_t;
+      ta1 = ta0;
+      tl1 = tl0;
+#else
       x->vp8_short_fdct8x4(be->src_diff, be->coeff, 32);
       x->vp8_short_fdct8x4(be->src_diff + 64, be->coeff + 64, 32);
 
@@ -1230,6 +1268,8 @@ static int rd_pick_intra8x8block(
       rate_t += cost_coeffs(x, xd->block + ib + 5, PLANE_TYPE_Y_WITH_DC,
                             &ta1, &tl1);
       rate += rate_t;
+#endif
+
       this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
       if (this_rd < best_rd) {
         *bestrate = rate;
@@ -1257,10 +1297,18 @@ static int rd_pick_intra8x8block(
   b->bmi.as_mode.second = (*best_second_mode);
 #endif
   vp8_encode_intra8x8(IF_RTCD(&cpi->rtcd), x, ib);
+
+#if CONFIG_HTRANS8X8
+  *(a + vp8_block2above_8x8[idx])     = besta0;
+  *(a + vp8_block2above_8x8[idx] + 1) = besta1;
+  *(l + vp8_block2left_8x8 [idx])     = bestl0;
+  *(l + vp8_block2left_8x8 [idx] + 1) = bestl1;
+#else
   *(a + vp8_block2above[ib])   = besta0;
   *(a + vp8_block2above[ib + 1]) = besta1;
   *(l + vp8_block2above[ib])   = bestl0;
   *(l + vp8_block2above[ib + 4]) = bestl1;
+#endif
   return best_rd;
 }
 
diff --git a/vp8/encoder/tokenize.c b/vp8/encoder/tokenize.c
index 50a3164..81ba6f2 100644
--- a/vp8/encoder/tokenize.c
+++ b/vp8/encoder/tokenize.c
@@ -334,9 +334,8 @@ static void tokenize1st_order_ht(   MACROBLOCKD *xd,
     }
 
     // assign scanning order for luma components coded in intra4x4 mode
-    if( ( ( xd->mode_info_context->mbmi.mode == B_PRED ) ||
-          ( xd->mode_info_context->mbmi.mode == I8X8_PRED ) ) &&
-        ( type == PLANE_TYPE_Y_WITH_DC) ) {
+    if( (xd->mode_info_context->mbmi.mode == B_PRED) &&
+        (type == PLANE_TYPE_Y_WITH_DC) ) {
       switch(b_mode) {
         case B_VE_PRED :
         case B_VR_PRED :
@@ -455,6 +454,84 @@ static void tokenize1st_order_ht(   MACROBLOCKD *xd,
 }
 #endif
 
+
+#if CONFIG_HTRANS8X8
+static void tokenize1st_order_chroma
+(
+  MACROBLOCKD *xd,
+  TOKENEXTRA **tp,
+  int type,           /* which plane: 0=Y no DC, 1=Y2, 2=UV, 3=Y with DC */
+  VP8_COMP *cpi
+) {
+  unsigned int block;
+  const BLOCKD *b;
+  int pt;             /* near block/prev token context index */
+  int c;
+  int token;
+  TOKENEXTRA *t = *tp;/* store tokens starting here */
+  const short *qcoeff_ptr;
+  ENTROPY_CONTEXT *a;
+  ENTROPY_CONTEXT *l;
+  int band, rc, v;
+  int tmp1, tmp2;
+
+  int seg_eob = 16;
+  int segment_id = xd->mode_info_context->mbmi.segment_id;
+
+  if (segfeature_active(xd, segment_id, SEG_LVL_EOB)) {
+    seg_eob = get_segdata(xd, segment_id, SEG_LVL_EOB);
+  }
+
+  b = xd->block;
+  b += 16;
+
+  /* Chroma */
+  for (block = 16; block < 24; block++, b++) {
+    tmp1 = vp8_block2above[block];
+    tmp2 = vp8_block2left[block];
+    qcoeff_ptr = b->qcoeff;
+    a = (ENTROPY_CONTEXT *)xd->above_context + tmp1;
+    l = (ENTROPY_CONTEXT *)xd->left_context + tmp2;
+
+    VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
+
+    for (c = 0; c < b->eob; c++) {
+      rc = vp8_default_zig_zag1d[c];
+      band = vp8_coef_bands[c];
+      v = qcoeff_ptr[rc];
+
+      t->Extra = vp8_dct_value_tokens_ptr[v].Extra;
+      token    = vp8_dct_value_tokens_ptr[v].Token;
+
+      t->Token = token;
+      t->context_tree = cpi->common.fc.coef_probs [2] [band] [pt];
+
+      t->skip_eob_node = ((pt == 0) && (band > 0));
+
+      ++cpi->coef_counts       [2] [band] [pt] [token];
+
+      pt = vp8_prev_token_class[token];
+      t++;
+    }
+
+    if (c < seg_eob) {
+      band = vp8_coef_bands[c];
+      t->Token = DCT_EOB_TOKEN;
+      t->context_tree = cpi->common.fc.coef_probs [2] [band] [pt];
+
+      t->skip_eob_node = ((pt == 0) && (band > 0));
+
+      ++cpi->coef_counts       [2] [band] [pt] [DCT_EOB_TOKEN];
+
+      t++;
+    }
+    *tp = t;
+    pt = (c != 0); /* 0 <-> all coeff data is zero */
+    *a = *l = pt;
+  }
+}
+#endif
+
 static void tokenize1st_order_b
 (
   MACROBLOCKD *xd,
@@ -640,7 +717,8 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) {
 
 #if CONFIG_HYBRIDTRANSFORM
     int QIndex = cpi->mb.q_index;
-    int active_ht = (QIndex < ACTIVE_HT);
+    int active_ht = (QIndex < ACTIVE_HT) &&
+                    (x->mode_info_context->mbmi.mode == B_PRED);
 #endif
 
   if (!segfeature_active(x, segment_id, SEG_LVL_EOB) ||
@@ -717,7 +795,29 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) {
     if(active_ht) {
       tokenize1st_order_ht(x, t, plane_type, cpi);
     } else {
+
+#if CONFIG_HTRANS8X8
+      if (x->mode_info_context->mbmi.mode == I8X8_PRED) {
+        ENTROPY_CONTEXT *A = (ENTROPY_CONTEXT *)x->above_context;
+        ENTROPY_CONTEXT *L = (ENTROPY_CONTEXT *)x->left_context;
+        for (b = 0; b < 16; b += 4) {
+          tokenize1st_order_b_8x8(x,
+                                  x->block + b, t, PLANE_TYPE_Y_WITH_DC,
+                                  x->frame_type,
+                                  A + vp8_block2above_8x8[b],
+                                  L + vp8_block2left_8x8[b],
+                                  cpi);
+          *(A + vp8_block2above_8x8[b] + 1) = *(A + vp8_block2above_8x8[b]);
+          *(L + vp8_block2left_8x8[b] + 1)  = *(L + vp8_block2left_8x8[b]);
+        }
+        tokenize1st_order_chroma(x, t, PLANE_TYPE_UV, cpi);
+      } else {
+        tokenize1st_order_b(x, t, plane_type, cpi);
+      }
+#else
       tokenize1st_order_b(x, t, plane_type, cpi);
+#endif
+
     }
 #else
     tokenize1st_order_b(x, t, plane_type, cpi);
-- 
2.7.4