Make the quantizer exact.

author Timothy B. Terriberry <tterribe@xiph.org>

Tue, 29 Jun 2010 00:15:09 +0000 (17:15 -0700)

committer Timothy B. Terriberry <tterribe@xiph.org>

Fri, 23 Jul 2010 15:48:01 +0000 (08:48 -0700)
author Timothy B. Terriberry <tterribe@xiph.org>
Tue, 29 Jun 2010 00:15:09 +0000 (17:15 -0700)
committer Timothy B. Terriberry <tterribe@xiph.org>
Fri, 23 Jul 2010 15:48:01 +0000 (08:48 -0700)
diff --git a/vp8/encoder/arm/csystemdependent.c b/vp8/encoder/arm/csystemdependent.c

index 4521bfc..bfceab1 100644 (file)
--- a/vp8/encoder/arm/csystemdependent.c
+++ b/vp8/encoder/arm/csystemdependent.c
@@ -63,7 +63,7 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi)
      cpi->rtcd.encodemb.submbuv               = vp8_subtract_mbuv_neon;
  
      cpi->rtcd.quantize.quantb                = vp8_regular_quantize_b;
-    cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_neon;
+    /*cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_neon;*/
  #elif HAVE_ARMV6
      cpi->rtcd.variance.sad16x16              = vp8_sad16x16_c;
      cpi->rtcd.variance.sad16x8               = vp8_sad16x8_c;
diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h

index b55bc51..19d307d 100644 (file)
--- a/vp8/encoder/block.h
+++ b/vp8/encoder/block.h
@@ -33,6 +33,7 @@ typedef struct
  
      // 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries
      short(*quant)[4];
+    short(*quant_shift)[4];
      short(*zbin)[4];
      short(*zrun_zbin_boost);
      short(*round)[4];
diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c

index 32cef1d..a05b332 100644 (file)
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -103,6 +103,18 @@ static const int qzbin_factors[129] =
      80,
  };
  
+static void vp8cx_invert_quant(short *quant, short *shift, short d)
+{
+    unsigned t;
+    int l;
+    t = d;
+    for(l = 0; t > 1; l++)
+        t>>=1;
+    t = 1 + (1<<(16+l))/d;
+    *quant = (short)(t - (1<<16));
+    *shift = l;
+}
+
  void vp8cx_init_quantizer(VP8_COMP *cpi)
  {
      int r, c;
@@ -116,21 +128,24 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
      {
          // dc values
          quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
-        cpi->Y1quant[Q][0][0] = (1 << 16) / quant_val;
+        vp8cx_invert_quant(cpi->Y1quant[Q][0] + 0,
+                           cpi->Y1quant_shift[Q][0] + 0, quant_val);
          cpi->Y1zbin[Q][0][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
          cpi->Y1round[Q][0][0] = (qrounding_factors[Q] * quant_val) >> 7;
          cpi->common.Y1dequant[Q][0][0] = quant_val;
          cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
  
          quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);
-        cpi->Y2quant[Q][0][0] = (1 << 16) / quant_val;
+        vp8cx_invert_quant(cpi->Y2quant[Q][0] + 0,
+                           cpi->Y2quant_shift[Q][0] + 0, quant_val);
          cpi->Y2zbin[Q][0][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
          cpi->Y2round[Q][0][0] = (qrounding_factors[Q] * quant_val) >> 7;
          cpi->common.Y2dequant[Q][0][0] = quant_val;
          cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
  
          quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
-        cpi->UVquant[Q][0][0] = (1 << 16) / quant_val;
+        vp8cx_invert_quant(cpi->UVquant[Q][0] + 0,
+                           cpi->UVquant_shift[Q][0] + 0, quant_val);
          cpi->UVzbin[Q][0][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
          cpi->UVround[Q][0][0] = (qrounding_factors[Q] * quant_val) >> 7;
          cpi->common.UVdequant[Q][0][0] = quant_val;
@@ -144,21 +159,24 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
              c = (rc & 3);
  
              quant_val = vp8_ac_yquant(Q);
-            cpi->Y1quant[Q][r][c] = (1 << 16) / quant_val;
+            vp8cx_invert_quant(cpi->Y1quant[Q][r] + c,
+                               cpi->Y1quant_shift[Q][r] + c, quant_val);
              cpi->Y1zbin[Q][r][c] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
              cpi->Y1round[Q][r][c] = (qrounding_factors[Q] * quant_val) >> 7;
              cpi->common.Y1dequant[Q][r][c] = quant_val;
              cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7;
  
              quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
-            cpi->Y2quant[Q][r][c] = (1 << 16) / quant_val;
+            vp8cx_invert_quant(cpi->Y2quant[Q][r] + c,
+                               cpi->Y2quant_shift[Q][r] + c, quant_val);
              cpi->Y2zbin[Q][r][c] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
              cpi->Y2round[Q][r][c] = (qrounding_factors[Q] * quant_val) >> 7;
              cpi->common.Y2dequant[Q][r][c] = quant_val;
              cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7;
  
              quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
-            cpi->UVquant[Q][r][c] = (1 << 16) / quant_val;
+            vp8cx_invert_quant(cpi->UVquant[Q][r] + c,
+                               cpi->UVquant_shift[Q][r] + c, quant_val);
              cpi->UVzbin[Q][r][c] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
              cpi->UVround[Q][r][c] = (qrounding_factors[Q] * quant_val) >> 7;
              cpi->common.UVdequant[Q][r][c] = quant_val;
@@ -198,6 +216,7 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
      for (i = 0; i < 16; i++)
      {
          x->block[i].quant = cpi->Y1quant[QIndex];
+        x->block[i].quant_shift = cpi->Y1quant_shift[QIndex];
          x->block[i].zbin = cpi->Y1zbin[QIndex];
          x->block[i].round = cpi->Y1round[QIndex];
          x->e_mbd.block[i].dequant = cpi->common.Y1dequant[QIndex];
@@ -211,6 +230,7 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
      for (i = 16; i < 24; i++)
      {
          x->block[i].quant = cpi->UVquant[QIndex];
+        x->block[i].quant_shift = cpi->UVquant_shift[QIndex];
          x->block[i].zbin = cpi->UVzbin[QIndex];
          x->block[i].round = cpi->UVround[QIndex];
          x->e_mbd.block[i].dequant = cpi->common.UVdequant[QIndex];
@@ -221,6 +241,7 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
      // Y2
      zbin_extra = (cpi->common.Y2dequant[QIndex][0][1] * ((cpi->zbin_over_quant / 2) + cpi->zbin_mode_boost)) >> 7;
      x->block[24].quant = cpi->Y2quant[QIndex];
+    x->block[24].quant_shift = cpi->Y2quant_shift[QIndex];
      x->block[24].zbin = cpi->Y2zbin[QIndex];
      x->block[24].round = cpi->Y2round[QIndex];
      x->e_mbd.block[24].dequant = cpi->common.Y2dequant[QIndex];
diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c

index dd98a09..54646f4 100644 (file)
--- a/vp8/encoder/ethreading.c
+++ b/vp8/encoder/ethreading.c
@@ -286,6 +286,7 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
      for (i = 0; i < 25; i++)
      {
          z->block[i].quant           = x->block[i].quant;
+        z->block[i].quant_shift     = x->block[i].quant_shift;
          z->block[i].zbin            = x->block[i].zbin;
          z->block[i].zrun_zbin_boost   = x->block[i].zrun_zbin_boost;
          z->block[i].round           = x->block[i].round;
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h

index fcde220..f76d2ef 100644 (file)
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -234,14 +234,17 @@ typedef struct
  {
  
      DECLARE_ALIGNED(16, short, Y1quant[QINDEX_RANGE][4][4]);
+    DECLARE_ALIGNED(16, short, Y1quant_shift[QINDEX_RANGE][4][4]);
      DECLARE_ALIGNED(16, short, Y1zbin[QINDEX_RANGE][4][4]);
      DECLARE_ALIGNED(16, short, Y1round[QINDEX_RANGE][4][4]);
  
      DECLARE_ALIGNED(16, short, Y2quant[QINDEX_RANGE][4][4]);
+    DECLARE_ALIGNED(16, short, Y2quant_shift[QINDEX_RANGE][4][4]);
      DECLARE_ALIGNED(16, short, Y2zbin[QINDEX_RANGE][4][4]);
      DECLARE_ALIGNED(16, short, Y2round[QINDEX_RANGE][4][4]);
  
      DECLARE_ALIGNED(16, short, UVquant[QINDEX_RANGE][4][4]);
+    DECLARE_ALIGNED(16, short, UVquant_shift[QINDEX_RANGE][4][4]);
      DECLARE_ALIGNED(16, short, UVzbin[QINDEX_RANGE][4][4]);
      DECLARE_ALIGNED(16, short, UVround[QINDEX_RANGE][4][4]);
  
diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c

index 181870c..877002b 100644 (file)
--- a/vp8/encoder/quantize.c
+++ b/vp8/encoder/quantize.c
@@ -25,6 +25,7 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
      short *zbin_ptr   = &b->zbin[0][0];
      short *round_ptr  = &b->round[0][0];
      short *quant_ptr  = &b->quant[0][0];
+    short *quant_shift_ptr = &b->quant_shift[0][0];
      short *qcoeff_ptr = d->qcoeff;
      short *dqcoeff_ptr = d->dqcoeff;
      short *dequant_ptr = &d->dequant[0][0];
@@ -45,7 +46,9 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
  
          if (x >= zbin)
          {
-            y  = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; // quantize (x)
+            x += round_ptr[rc];
+            y  = (((x * quant_ptr[rc]) >> 16) + x)
+                 >> quant_shift_ptr[rc];                // quantize (x)
              x  = (y ^ sz) - sz;                         // get the sign back
              qcoeff_ptr[rc] = x;                          // write to destination
              dqcoeff_ptr[rc] = x * dequant_ptr[rc];        // dequantized value
@@ -69,6 +72,7 @@ void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d)
      short *zbin_ptr   = &b->zbin[0][0];
      short *round_ptr  = &b->round[0][0];
      short *quant_ptr  = &b->quant[0][0];
+    short *quant_shift_ptr = &b->quant_shift[0][0];
      short *qcoeff_ptr = d->qcoeff;
      short *dqcoeff_ptr = d->dqcoeff;
      short *dequant_ptr = &d->dequant[0][0];
@@ -95,7 +99,9 @@ void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d)
  
          if (x >= zbin)
          {
-            y  = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; // quantize (x)
+            x += round_ptr[rc];
+            y  = (((x * quant_ptr[rc]) >> 16) + x)
+                 >> quant_shift_ptr[rc];                // quantize (x)
              x  = (y ^ sz) - sz;                         // get the sign back
              qcoeff_ptr[rc]  = x;                         // write to destination
              dqcoeff_ptr[rc] = x * dequant_ptr[rc];        // dequantized value
diff --git a/vp8/encoder/x86/x86_csystemdependent.c b/vp8/encoder/x86/x86_csystemdependent.c

index 11ef419..be226e0 100644 (file)
--- a/vp8/encoder/x86/x86_csystemdependent.c
+++ b/vp8/encoder/x86/x86_csystemdependent.c
@@ -238,7 +238,7 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
          cpi->rtcd.encodemb.submby                = vp8_subtract_mby_mmx;
          cpi->rtcd.encodemb.submbuv               = vp8_subtract_mbuv_mmx;
  
-        cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_mmx;
+        /*cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_mmx;*/
      }
  
  #endif
@@ -285,8 +285,8 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
          cpi->rtcd.encodemb.mbuverr               = vp8_mbuverror_xmm;
          /* cpi->rtcd.encodemb.sub* not implemented for wmt */
  
-        cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_sse;
-        cpi->rtcd.quantize.quantb            = vp8_regular_quantize_b_sse2;
+        /*cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_sse;
+        cpi->rtcd.quantize.quantb            = vp8_regular_quantize_b_sse2;*/
      }
  
  #endif
author	Timothy B. Terriberry <tterribe@xiph.org>
	Tue, 29 Jun 2010 00:15:09 +0000 (17:15 -0700)
committer	Timothy B. Terriberry <tterribe@xiph.org>
	Fri, 23 Jul 2010 15:48:01 +0000 (08:48 -0700)
vp8/encoder/arm/csystemdependent.c		patch \| blob \| history
vp8/encoder/block.h		patch \| blob \| history
vp8/encoder/encodeframe.c		patch \| blob \| history
vp8/encoder/ethreading.c		patch \| blob \| history
vp8/encoder/onyx_int.h		patch \| blob \| history
vp8/encoder/quantize.c		patch \| blob \| history
vp8/encoder/x86/x86_csystemdependent.c		patch \| blob \| history