Improved vp9_ihtllm_c

author Scott LaVarnway <slavarnway@google.com>

Wed, 12 Dec 2012 23:49:39 +0000 (15:49 -0800)

committer Scott LaVarnway <slavarnway@google.com>

Wed, 12 Dec 2012 23:49:39 +0000 (15:49 -0800)
author Scott LaVarnway <slavarnway@google.com>
Wed, 12 Dec 2012 23:49:39 +0000 (15:49 -0800)
committer Scott LaVarnway <slavarnway@google.com>
Wed, 12 Dec 2012 23:49:39 +0000 (15:49 -0800)
diff --git a/vp9/common/vp9_idctllm.c b/vp9/common/vp9_idctllm.c

index 9622dfd..897514e 100644 (file)
--- a/vp9/common/vp9_idctllm.c
+++ b/vp9/common/vp9_idctllm.c
@@ -404,8 +404,9 @@ void vp9_ihtllm_float_c(const int16_t *input, int16_t *output, int pitch,
  #define HORIZONTAL_SHIFT 17  // 15
  #define HORIZONTAL_ROUNDING ((1 << (HORIZONTAL_SHIFT - 1)) - 1)
  void vp9_ihtllm_c(const int16_t *input, int16_t *output, int pitch,
-                      TX_TYPE tx_type, int tx_dim) {
+                      TX_TYPE tx_type, int tx_dim, uint16_t eobs) {
    int i, j, k;
+  int nz_dim;
    int16_t imbuf[256];
  
    const int16_t *ip = input;
@@ -444,12 +445,25 @@ void vp9_ihtllm_c(const int16_t *input, int16_t *output, int pitch,
        break;
    }
  
+  nz_dim = tx_dim;
+  if(tx_dim > 4) {
+    if(eobs < 36) {
+      vpx_memset(im, 0, 512);
+      nz_dim = 8;
+      if(eobs < 3) {
+        nz_dim = 2;
+      } else if(eobs < 10) {
+        nz_dim = 4;
+      }
+    }
+  }
+
    /* vertical transformation */
    for (j = 0; j < tx_dim; j++) {
-    for (i = 0; i < tx_dim; i++) {
+    for (i = 0; i < nz_dim; i++) {
        int temp = 0;
  
-      for (k = 0; k < tx_dim; k++) {
+      for (k = 0; k < nz_dim; k++) {
          temp += ptv[k] * ip[(k * tx_dim)];
        }
  
@@ -470,7 +484,7 @@ void vp9_ihtllm_c(const int16_t *input, int16_t *output, int pitch,
      for (i = 0; i < tx_dim; i++) {
        int temp = 0;
  
-      for (k = 0; k < tx_dim; k++) {
+      for (k = 0; k < nz_dim; k++) {
          temp += im[k] * pthc[k];
        }
  
diff --git a/vp9/common/vp9_invtrans.c b/vp9/common/vp9_invtrans.c

index c78f1ad..eff9198 100644 (file)
--- a/vp9/common/vp9_invtrans.c
+++ b/vp9/common/vp9_invtrans.c
@@ -52,7 +52,7 @@ void vp9_inverse_transform_mby_4x4(MACROBLOCKD *xd) {
      TX_TYPE tx_type = get_tx_type_4x4(xd, &xd->block[i]);
      if (tx_type != DCT_DCT) {
        vp9_ihtllm(xd->block[i].dqcoeff, xd->block[i].diff, 32,
-                   tx_type, 4);
+                   tx_type, 4, xd->block[i].eob);
      } else {
        vp9_inverse_transform_b_4x4(xd, i, 32);
      }
@@ -91,7 +91,8 @@ void vp9_inverse_transform_mby_8x8(MACROBLOCKD *xd) {
    for (i = 0; i < 9; i += 8) {
      TX_TYPE tx_type = get_tx_type_8x8(xd, &xd->block[i]);
      if (tx_type != DCT_DCT) {
-      vp9_ihtllm(xd->block[i].dqcoeff, xd->block[i].diff, 32, tx_type, 8);
+      vp9_ihtllm(xd->block[i].dqcoeff, xd->block[i].diff, 32, tx_type, 8,
+                 xd->block[i].eob);
      } else {
        vp9_inverse_transform_b_8x8(&blockd[i].dqcoeff[0],
                                    &blockd[i].diff[0], 32);
@@ -100,7 +101,8 @@ void vp9_inverse_transform_mby_8x8(MACROBLOCKD *xd) {
    for (i = 2; i < 11; i += 8) {
      TX_TYPE tx_type = get_tx_type_8x8(xd, &xd->block[i]);
      if (tx_type != DCT_DCT) {
-      vp9_ihtllm(xd->block[i + 2].dqcoeff, xd->block[i].diff, 32, tx_type, 8);
+      vp9_ihtllm(xd->block[i + 2].dqcoeff, xd->block[i].diff, 32, tx_type, 8,
+                 xd->block[i + 2].eob);
      } else {
        vp9_inverse_transform_b_8x8(&blockd[i + 2].dqcoeff[0],
                                    &blockd[i].diff[0], 32);
@@ -132,7 +134,7 @@ void vp9_inverse_transform_mby_16x16(MACROBLOCKD *xd) {
    BLOCKD *bd = &xd->block[0];
    TX_TYPE tx_type = get_tx_type_16x16(xd, bd);
    if (tx_type != DCT_DCT) {
-    vp9_ihtllm(bd->dqcoeff, bd->diff, 32, tx_type, 16);
+    vp9_ihtllm(bd->dqcoeff, bd->diff, 32, tx_type, 16, bd->eob);
    } else {
      vp9_inverse_transform_b_16x16(&xd->block[0].dqcoeff[0],
                                    &xd->block[0].diff[0], 32);
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh

index 5b7af10..e8981ce 100644 (file)
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -361,7 +361,7 @@ specialize vp9_short_idct16x16
  prototype void vp9_short_idct10_16x16 "short *input, short *output, int pitch"
  specialize vp9_short_idct10_16x16
  
-prototype void vp9_ihtllm "const short *input, short *output, int pitch, int tx_type, int tx_dim"
+prototype void vp9_ihtllm "const short *input, short *output, int pitch, int tx_type, int tx_dim, short eobs"
  specialize vp9_ihtllm
  
  #
diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c

index f95a83a..b18ef8b 100644 (file)
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -248,7 +248,8 @@ static void decode_16x16(VP9D_COMP *pbi, MACROBLOCKD *xd,
    if (tx_type != DCT_DCT) {
      vp9_ht_dequant_idct_add_16x16_c(tx_type, xd->qcoeff,
                                      xd->block[0].dequant, xd->predictor,
-                                    xd->dst.y_buffer, 16, xd->dst.y_stride);
+                                    xd->dst.y_buffer, 16, xd->dst.y_stride,
+                                    xd->eobs[0]);
    } else {
      vp9_dequant_idct_add_16x16(xd->qcoeff, xd->block[0].dequant,
                                 xd->predictor, xd->dst.y_buffer,
@@ -294,7 +295,8 @@ static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd,
        }
        tx_type = get_tx_type_8x8(xd, &xd->block[ib]);
        if (tx_type != DCT_DCT) {
-        vp9_ht_dequant_idct_add_8x8_c(tx_type, q, dq, pre, dst, 16, stride);
+        vp9_ht_dequant_idct_add_8x8_c(tx_type, q, dq, pre, dst, 16, stride,
+                                      xd->eobs[idx]);
        } else {
          vp9_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride,
                                     0, xd->eobs[idx]);
@@ -393,7 +395,7 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
            vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff,
                                      b->dequant, b->predictor,
                                      *(b->base_dst) + b->dst, 16,
-                                    b->dst_stride);
+                                    b->dst_stride, b->eob);
          } else {
            vp9_dequant_idct_add(b->qcoeff, b->dequant, b->predictor,
                                 *(b->base_dst) + b->dst, 16, b->dst_stride);
@@ -438,7 +440,8 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
        if (tx_type != DCT_DCT) {
          vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff,
                                    b->dequant, b->predictor,
-                                  *(b->base_dst) + b->dst, 16, b->dst_stride);
+                                  *(b->base_dst) + b->dst, 16, b->dst_stride,
+                                  b->eob);
        } else {
          vp9_dequant_idct_add(b->qcoeff, b->dequant, b->predictor,
                               *(b->base_dst) + b->dst, 16, b->dst_stride);
@@ -500,7 +503,7 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
            vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff,
                                      b->dequant, b->predictor,
                                      *(b->base_dst) + b->dst, 16,
-                                    b->dst_stride);
+                                    b->dst_stride, b->eob);
          } else {
            vp9_dequant_idct_add(b->qcoeff, b->dequant, b->predictor,
                                 *(b->base_dst) + b->dst, 16, b->dst_stride);
@@ -553,7 +556,7 @@ static void decode_16x16_sb(VP9D_COMP *pbi, MACROBLOCKD *xd,
          tx_type, xd->qcoeff, xd->block[0].dequant,
          xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
          xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
-        xd->dst.y_stride, xd->dst.y_stride);
+        xd->dst.y_stride, xd->dst.y_stride, xd->block[0].eob);
    } else {
      vp9_dequant_idct_add_16x16(
          xd->qcoeff, xd->block[0].dequant,
@@ -591,7 +594,7 @@ static void decode_8x8_sb(VP9D_COMP *pbi, MACROBLOCKD *xd,
              + x_idx * 16 + (i & 1) * 8,
              xd->dst.y_buffer + (y_idx * 16 + (i / 2) * 8) * xd->dst.y_stride
              + x_idx * 16 + (i & 1) * 8,
-            stride, stride);
+            stride, stride, b->eob);
        } else {
          vp9_dequant_idct_add_8x8_c(
              q, dq,
@@ -647,7 +650,7 @@ static void decode_4x4_sb(VP9D_COMP *pbi, MACROBLOCKD *xd,
              + x_idx * 16 + (i & 3) * 4,
              xd->dst.y_buffer + (y_idx * 16 + (i / 4) * 4) * xd->dst.y_stride
              + x_idx * 16 + (i & 3) * 4,
-            xd->dst.y_stride, xd->dst.y_stride);
+            xd->dst.y_stride, xd->dst.y_stride, b->eob);
        } else {
          vp9_dequant_idct_add_c(
              b->qcoeff, b->dequant,
diff --git a/vp9/decoder/vp9_dequantize.c b/vp9/decoder/vp9_dequantize.c

index 79114d5..39a2de1 100644 (file)
--- a/vp9/decoder/vp9_dequantize.c
+++ b/vp9/decoder/vp9_dequantize.c
@@ -13,7 +13,6 @@
  #include "vp9/decoder/vp9_dequantize.h"
  #include "vpx_mem/vpx_mem.h"
  #include "vp9/decoder/vp9_onyxd_int.h"
-
  static void add_residual(const int16_t *diff, const uint8_t *pred, int pitch,
                           uint8_t *dest, int stride, int width, int height) {
    int r, c;
@@ -74,7 +73,7 @@ void vp9_dequantize_b_c(BLOCKD *d) {
  void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, int16_t *input,
                                 const int16_t *dq,
                                 uint8_t *pred, uint8_t *dest,
-                               int pitch, int stride) {
+                               int pitch, int stride, uint16_t eobs) {
    int16_t output[16];
    int16_t *diff_ptr = output;
    int i;
@@ -83,7 +82,7 @@ void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, int16_t *input,
      input[i] = dq[i] * input[i];
    }
  
-  vp9_ihtllm(input, output, 4 << 1, tx_type, 4);
+  vp9_ihtllm(input, output, 4 << 1, tx_type, 4, eobs);
  
    vpx_memset(input, 0, 32);
  
@@ -93,21 +92,25 @@ void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, int16_t *input,
  void vp9_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, int16_t *input,
                                     const int16_t *dq,
                                     uint8_t *pred, uint8_t *dest,
-                                   int pitch, int stride) {
+                                   int pitch, int stride, uint16_t eobs) {
    int16_t output[64];
    int16_t *diff_ptr = output;
    int i;
+  if (eobs == 0) {
+    /* All 0 DCT coefficient */
+    vp9_copy_mem8x8(pred, pitch, dest, stride);
+  } else if (eobs > 0) {
+    input[0] = dq[0] * input[0];
+    for (i = 1; i < 64; i++) {
+      input[i] = dq[1] * input[i];
+    }
  
-  input[0] = dq[0] * input[0];
-  for (i = 1; i < 64; i++) {
-    input[i] = dq[1] * input[i];
-  }
-
-  vp9_ihtllm(input, output, 16, tx_type, 8);
+    vp9_ihtllm(input, output, 16, tx_type, 8, eobs);
  
-  vpx_memset(input, 0, 128);
+    vpx_memset(input, 0, 128);
  
-  add_residual(diff_ptr, pred, pitch, dest, stride, 8, 8);
+    add_residual(diff_ptr, pred, pitch, dest, stride, 8, 8);
+  }
  }
  
  void vp9_dequant_idct_add_c(int16_t *input, const int16_t *dq, uint8_t *pred,
@@ -269,26 +272,31 @@ void vp9_dequant_idct_add_8x8_c(int16_t *input, const int16_t *dq,
  
  void vp9_ht_dequant_idct_add_16x16_c(TX_TYPE tx_type, int16_t *input,
                                       const int16_t *dq, uint8_t *pred,
-                                     uint8_t *dest, int pitch, int stride) {
+                                     uint8_t *dest, int pitch, int stride,
+                                     uint16_t eobs) {
    int16_t output[256];
    int16_t *diff_ptr = output;
    int i;
+  if (eobs == 0) {
+    /* All 0 DCT coefficient */
+    vp9_copy_mem16x16(pred, pitch, dest, stride);
+  } else if (eobs > 0) {
+    input[0]= input[0] * dq[0];
  
-  input[0]= input[0] * dq[0];
-
-  // recover quantizer for 4 4x4 blocks
-  for (i = 1; i < 256; i++)
-    input[i] = input[i] * dq[1];
+    // recover quantizer for 4 4x4 blocks
+    for (i = 1; i < 256; i++)
+      input[i] = input[i] * dq[1];
  
-  // inverse hybrid transform
-  vp9_ihtllm(input, output, 32, tx_type, 16);
+    // inverse hybrid transform
+    vp9_ihtllm(input, output, 32, tx_type, 16, eobs);
  
-  // the idct halves ( >> 1) the pitch
-  // vp9_short_idct16x16_c(input, output, 32);
+    // the idct halves ( >> 1) the pitch
+    // vp9_short_idct16x16_c(input, output, 32);
  
-  vpx_memset(input, 0, 512);
+    vpx_memset(input, 0, 512);
  
-  add_residual(diff_ptr, pred, pitch, dest, stride, 16, 16);
+    add_residual(diff_ptr, pred, pitch, dest, stride, 16, 16);
+  }
  }
  
  void vp9_dequant_idct_add_16x16_c(int16_t *input, const int16_t *dq,
diff --git a/vp9/decoder/vp9_dequantize.h b/vp9/decoder/vp9_dequantize.h

index 8a6bf2b..f348b21 100644 (file)
--- a/vp9/decoder/vp9_dequantize.h
+++ b/vp9/decoder/vp9_dequantize.h
@@ -58,16 +58,17 @@ typedef void(*vp9_dequant_idct_add_uv_block_fn_t)(short *q, const short *dq,
  
  void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, short *input, const short *dq,
                                      unsigned char *pred, unsigned char *dest,
-                                    int pitch, int stride);
+                                    int pitch, int stride, uint16_t eobs);
  
  void vp9_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, short *input,
                                     const short *dq, unsigned char *pred,
-                                   unsigned char *dest, int pitch, int stride);
+                                   unsigned char *dest, int pitch, int stride,
+                                   uint16_t eobs);
  
  void vp9_ht_dequant_idct_add_16x16_c(TX_TYPE tx_type, short *input,
                                       const short *dq, unsigned char *pred,
                                       unsigned char *dest,
-                                     int pitch, int stride);
+                                     int pitch, int stride, uint16_t eobs);
  
  #if CONFIG_SUPERBLOCKS
  void vp9_dequant_dc_idct_add_y_block_8x8_inplace_c(short *q, const short *dq,
diff --git a/vp9/encoder/vp9_encodeintra.c b/vp9/encoder/vp9_encodeintra.c

index 4ee21bb..810f1c4 100644 (file)
--- a/vp9/encoder/vp9_encodeintra.c
+++ b/vp9/encoder/vp9_encodeintra.c
@@ -70,7 +70,7 @@ void vp9_encode_intra4x4block(MACROBLOCK *x, int ib) {
    if (tx_type != DCT_DCT) {
      vp9_fht(be->src_diff, 32, be->coeff, tx_type, 4);
      vp9_ht_quantize_b_4x4(be, b, tx_type);
-    vp9_ihtllm(b->dqcoeff, b->diff, 32, tx_type, 4);
+    vp9_ihtllm(b->dqcoeff, b->diff, 32, tx_type, 4, b->eob);
    } else {
      x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);
      x->quantize_b_4x4(be, b) ;
@@ -191,7 +191,7 @@ void vp9_encode_intra8x8(MACROBLOCK *x, int ib) {
                  tx_type, 8);
        x->quantize_b_8x8(x->block + idx, xd->block + idx);
        vp9_ihtllm(xd->block[idx].dqcoeff, xd->block[ib].diff, 32,
-                   tx_type, 8);
+                   tx_type, 8, xd->block[idx].eob);
      } else {
        x->vp9_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32);
        x->quantize_b_8x8(x->block + idx, xd->block + idx);
@@ -205,7 +205,7 @@ void vp9_encode_intra8x8(MACROBLOCK *x, int ib) {
        if (tx_type != DCT_DCT) {
          vp9_fht_c(be->src_diff, 32, be->coeff, tx_type, 4);
          vp9_ht_quantize_b_4x4(be, b, tx_type);
-        vp9_ihtllm(b->dqcoeff, b->diff, 32, tx_type, 4);
+        vp9_ihtllm(b->dqcoeff, b->diff, 32, tx_type, 4, b->eob);
        } else {
          x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);
          x->quantize_b_4x4(be, b);
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c

index 9cea189..4559e44 100644 (file)
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -1120,7 +1120,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be,
  
    // inverse transform
    if (best_tx_type != DCT_DCT)
-    vp9_ihtllm(best_dqcoeff, b->diff, 32, best_tx_type, 4);
+    vp9_ihtllm(best_dqcoeff, b->diff, 32, best_tx_type, 4, b->eob);
    else
      xd->inv_xform4x4_x8(best_dqcoeff, b->diff, 32);
author	Scott LaVarnway <slavarnway@google.com>
	Wed, 12 Dec 2012 23:49:39 +0000 (15:49 -0800)
committer	Scott LaVarnway <slavarnway@google.com>
	Wed, 12 Dec 2012 23:49:39 +0000 (15:49 -0800)
vp9/common/vp9_idctllm.c		patch \| blob \| history
vp9/common/vp9_invtrans.c		patch \| blob \| history
vp9/common/vp9_rtcd_defs.sh		patch \| blob \| history
vp9/decoder/vp9_decodframe.c		patch \| blob \| history
vp9/decoder/vp9_dequantize.c		patch \| blob \| history
vp9/decoder/vp9_dequantize.h		patch \| blob \| history
vp9/encoder/vp9_encodeintra.c		patch \| blob \| history
vp9/encoder/vp9_rdopt.c		patch \| blob \| history