optimize 8x8 fdct rounding for accuracy

author Yaowu Xu <yaowu@google.com>

Fri, 22 Feb 2013 19:14:04 +0000 (11:14 -0800)

committer Yaowu Xu <yaowu@google.com>

Sat, 23 Feb 2013 00:55:30 +0000 (16:55 -0800)
author Yaowu Xu <yaowu@google.com>
Fri, 22 Feb 2013 19:14:04 +0000 (11:14 -0800)
committer Yaowu Xu <yaowu@google.com>
Sat, 23 Feb 2013 00:55:30 +0000 (16:55 -0800)
diff --git a/test/fdct8x8_test.cc b/test/fdct8x8_test.cc

index d82f7c3..1a3e240 100644 (file)
--- a/test/fdct8x8_test.cc
+++ b/test/fdct8x8_test.cc
@@ -141,7 +141,7 @@ TEST(VP9Fdct8x8Test, ExtremalCheck) {
  
      // Initialize a test block with input range {-255, 255}.
      for (int j = 0; j < 64; ++j)
-      test_input_block[j] = rnd.Rand8() % 2 ? 255 : -255;
+      test_input_block[j] = rnd.Rand8() % 2 ? 255 : -256;
  
      const int pitch = 16;
      vp9_short_fdct8x8_c(test_input_block, test_temp_block, pitch);
diff --git a/test/test.mk b/test/test.mk

index 557eafd..2965762 100644 (file)
--- a/test/test.mk
+++ b/test/test.mk
@@ -72,7 +72,7 @@ endif
  
  LIBVPX_TEST_SRCS-$(CONFIG_VP9)         += convolve_test.cc
  LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct4x4_test.cc
-#LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc
  #LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct16x16_test.cc
  LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += variance_test.cc
  #LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct32x32_test.cc
diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c

index 4a1e78e..a459e94 100644 (file)
--- a/vp9/encoder/vp9_dct.c
+++ b/vp9/encoder/vp9_dct.c
@@ -323,247 +323,6 @@ static const int16_t adst_i16[256] = {
  };
  #endif
  
-#define NEW_FDCT8x8 1
-#if !NEW_FDCT8x8
-static const int xC1S7 = 16069;
-static const int xC2S6 = 15137;
-static const int xC3S5 = 13623;
-static const int xC4S4 = 11585;
-static const int xC5S3 =  9102;
-static const int xC6S2 =  6270;
-static const int xC7S1 =  3196;
-
-#define SHIFT_BITS 14
-#define DOROUND(X) X += (1<<(SHIFT_BITS-1));
-
-#define FINAL_SHIFT 3
-#define FINAL_ROUNDING (1<<(FINAL_SHIFT -1))
-#define IN_SHIFT (FINAL_SHIFT+1)
-
-
-void vp9_short_fdct8x8_c(short *InputData, short *OutputData, int pitch) {
-  int loop;
-  int short_pitch = pitch >> 1;
-  int is07, is12, is34, is56;
-  int is0734, is1256;
-  int id07, id12, id34, id56;
-  int irot_input_x, irot_input_y;
-  int icommon_product1;      // Re-used product  (c4s4 * (s12 - s56))
-  int icommon_product2;      // Re-used product  (c4s4 * (d12 + d56))
-  int temp1, temp2;          // intermediate variable for computation
-
-  int  InterData[64];
-  int  *ip = InterData;
-  short *op = OutputData;
-
-  for (loop = 0; loop < 8; loop++) {
-    // Pre calculate some common sums and differences.
-    is07 = (InputData[0] + InputData[7]) << IN_SHIFT;
-    is12 = (InputData[1] + InputData[2]) << IN_SHIFT;
-    is34 = (InputData[3] + InputData[4]) << IN_SHIFT;
-    is56 = (InputData[5] + InputData[6]) << IN_SHIFT;
-    id07 = (InputData[0] - InputData[7]) << IN_SHIFT;
-    id12 = (InputData[1] - InputData[2]) << IN_SHIFT;
-    id34 = (InputData[3] - InputData[4]) << IN_SHIFT;
-    id56 = (InputData[5] - InputData[6]) << IN_SHIFT;
-
-    is0734 = is07 + is34;
-    is1256 = is12 + is56;
-
-    // Pre-Calculate some common product terms.
-    icommon_product1 = xC4S4 * (is12 - is56);
-    DOROUND(icommon_product1)
-    icommon_product1 >>= SHIFT_BITS;
-
-    icommon_product2 = xC4S4 * (id12 + id56);
-    DOROUND(icommon_product2)
-    icommon_product2 >>= SHIFT_BITS;
-
-
-    ip[0] = (xC4S4 * (is0734 + is1256));
-    DOROUND(ip[0]);
-    ip[0] >>= SHIFT_BITS;
-
-    ip[4] = (xC4S4 * (is0734 - is1256));
-    DOROUND(ip[4]);
-    ip[4] >>= SHIFT_BITS;
-
-    // Define inputs to rotation for outputs 2 and 6
-    irot_input_x = id12 - id56;
-    irot_input_y = is07 - is34;
-
-    // Apply rotation for outputs 2 and 6.
-    temp1 = xC6S2 * irot_input_x;
-    DOROUND(temp1);
-    temp1 >>= SHIFT_BITS;
-    temp2 = xC2S6 * irot_input_y;
-    DOROUND(temp2);
-    temp2 >>= SHIFT_BITS;
-    ip[2] = temp1 + temp2;
-
-    temp1 = xC6S2 * irot_input_y;
-    DOROUND(temp1);
-    temp1 >>= SHIFT_BITS;
-    temp2 = xC2S6 * irot_input_x;
-    DOROUND(temp2);
-    temp2 >>= SHIFT_BITS;
-    ip[6] = temp1 - temp2;
-
-    // Define inputs to rotation for outputs 1 and 7
-    irot_input_x = icommon_product1 + id07;
-    irot_input_y = -(id34 + icommon_product2);
-
-    // Apply rotation for outputs 1 and 7.
-    temp1 = xC1S7 * irot_input_x;
-    DOROUND(temp1);
-    temp1 >>= SHIFT_BITS;
-    temp2 = xC7S1 * irot_input_y;
-    DOROUND(temp2);
-    temp2 >>= SHIFT_BITS;
-    ip[1] = temp1 - temp2;
-
-    temp1 = xC7S1 * irot_input_x;
-    DOROUND(temp1);
-    temp1 >>= SHIFT_BITS;
-    temp2 = xC1S7 * irot_input_y;
-    DOROUND(temp2);
-    temp2 >>= SHIFT_BITS;
-    ip[7] = temp1 + temp2;
-
-    // Define inputs to rotation for outputs 3 and 5
-    irot_input_x = id07 - icommon_product1;
-    irot_input_y = id34 - icommon_product2;
-
-    // Apply rotation for outputs 3 and 5.
-    temp1 = xC3S5 * irot_input_x;
-    DOROUND(temp1);
-    temp1 >>= SHIFT_BITS;
-    temp2 = xC5S3 * irot_input_y;
-    DOROUND(temp2);
-    temp2 >>= SHIFT_BITS;
-    ip[3] = temp1 - temp2;
-
-
-    temp1 = xC5S3 * irot_input_x;
-    DOROUND(temp1);
-    temp1 >>= SHIFT_BITS;
-    temp2 = xC3S5 * irot_input_y;
-    DOROUND(temp2);
-    temp2 >>= SHIFT_BITS;
-    ip[5] = temp1 + temp2;
-
-    // Increment data pointer for next row
-    InputData += short_pitch;
-    ip += 8;
-  }
-
-  // Performed DCT on rows, now transform the columns
-  ip = InterData;
-  for (loop = 0; loop < 8; loop++) {
-    // Pre calculate some common sums and differences.
-    is07 = ip[0 * 8] + ip[7 * 8];
-    is12 = ip[1 * 8] + ip[2 * 8];
-    is34 = ip[3 * 8] + ip[4 * 8];
-    is56 = ip[5 * 8] + ip[6 * 8];
-
-    id07 = ip[0 * 8] - ip[7 * 8];
-    id12 = ip[1 * 8] - ip[2 * 8];
-    id34 = ip[3 * 8] - ip[4 * 8];
-    id56 = ip[5 * 8] - ip[6 * 8];
-
-    is0734 = is07 + is34;
-    is1256 = is12 + is56;
-
-    // Pre-Calculate some common product terms
-    icommon_product1 = xC4S4 * (is12 - is56);
-    icommon_product2 = xC4S4 * (id12 + id56);
-    DOROUND(icommon_product1)
-    DOROUND(icommon_product2)
-    icommon_product1 >>= SHIFT_BITS;
-    icommon_product2 >>= SHIFT_BITS;
-
-
-    temp1 = xC4S4 * (is0734 + is1256);
-    temp2 = xC4S4 * (is0734 - is1256);
-    DOROUND(temp1);
-    DOROUND(temp2);
-    temp1 >>= SHIFT_BITS;
-
-    temp2 >>= SHIFT_BITS;
-    op[0 * 8] = (temp1 + FINAL_ROUNDING) >> FINAL_SHIFT;
-    op[4 * 8] = (temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
-
-    // Define inputs to rotation for outputs 2 and 6
-    irot_input_x = id12 - id56;
-    irot_input_y = is07 - is34;
-
-    // Apply rotation for outputs 2 and 6.
-    temp1 = xC6S2 * irot_input_x;
-    DOROUND(temp1);
-    temp1 >>= SHIFT_BITS;
-    temp2 = xC2S6 * irot_input_y;
-    DOROUND(temp2);
-    temp2 >>= SHIFT_BITS;
-    op[2 * 8] = (temp1 + temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
-
-    temp1 = xC6S2 * irot_input_y;
-    DOROUND(temp1);
-    temp1 >>= SHIFT_BITS;
-    temp2 = xC2S6 * irot_input_x;
-    DOROUND(temp2);
-    temp2 >>= SHIFT_BITS;
-    op[6 * 8] = (temp1 - temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
-
-    // Define inputs to rotation for outputs 1 and 7
-    irot_input_x = icommon_product1 + id07;
-    irot_input_y = -(id34 + icommon_product2);
-
-    // Apply rotation for outputs 1 and 7.
-    temp1 = xC1S7 * irot_input_x;
-    DOROUND(temp1);
-    temp1 >>= SHIFT_BITS;
-    temp2 = xC7S1 * irot_input_y;
-    DOROUND(temp2);
-    temp2 >>= SHIFT_BITS;
-    op[1 * 8] = (temp1 - temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
-
-    temp1 = xC7S1 * irot_input_x;
-    DOROUND(temp1);
-    temp1 >>= SHIFT_BITS;
-    temp2 = xC1S7 * irot_input_y;
-    DOROUND(temp2);
-    temp2 >>= SHIFT_BITS;
-    op[7 * 8] = (temp1 + temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
-
-    // Define inputs to rotation for outputs 3 and 5
-    irot_input_x = id07 - icommon_product1;
-    irot_input_y = id34 - icommon_product2;
-
-    // Apply rotation for outputs 3 and 5.
-    temp1 = xC3S5 * irot_input_x;
-    DOROUND(temp1);
-    temp1 >>= SHIFT_BITS;
-    temp2 = xC5S3 * irot_input_y;
-    DOROUND(temp2);
-    temp2 >>= SHIFT_BITS;
-    op[3 * 8] = (temp1 - temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
-
-
-    temp1 = xC5S3 * irot_input_x;
-    DOROUND(temp1);
-    temp1 >>= SHIFT_BITS;
-    temp2 = xC3S5 * irot_input_y;
-    DOROUND(temp2);
-    temp2 >>= SHIFT_BITS;
-    op[5 * 8] = (temp1 + temp2 + FINAL_ROUNDING) >> FINAL_SHIFT;
-
-    // Increment data pointer for next column.
-    ip++;
-    op++;
-  }
-}
-#endif
-
  /* For test */
  #define TEST_INT 1
  #if TEST_INT
@@ -918,7 +677,6 @@ void vp9_short_fdct8x4_c(short *input, short *output, int pitch)
      vp9_short_fdct4x4_c(input + 4, output + 16, pitch);
  }
  
-#if NEW_FDCT8x8
  static void fdct8_1d(int16_t *input, int16_t *output) {
    int16_t step[8];
    int temp1, temp2;
@@ -986,10 +744,9 @@ void vp9_short_fdct8x8_c(int16_t *input, int16_t *output, int pitch) {
        temp_in[j] = out[j + i * 8];
      fdct8_1d(temp_in, temp_out);
      for (j = 0; j < 8; ++j)
-      output[j + i * 8] = temp_out[j] >> 1;
+      output[j + i * 8] = temp_out[j] / 2;
    }
  }
-#endif
  
  #if CONFIG_INTHT
  static void fadst8_1d(int16_t *input, int16_t *output) {
author	Yaowu Xu <yaowu@google.com>
	Fri, 22 Feb 2013 19:14:04 +0000 (11:14 -0800)
committer	Yaowu Xu <yaowu@google.com>
	Sat, 23 Feb 2013 00:55:30 +0000 (16:55 -0800)
test/fdct8x8_test.cc		patch \| blob \| history
test/test.mk		patch \| blob \| history
vp9/encoder/vp9_dct.c		patch \| blob \| history