Make dequant/idct block size independent

author Jingning Han <jingning@google.com>

Wed, 10 Apr 2013 19:30:20 +0000 (12:30 -0700)

committer Jingning Han <jingning@google.com>

Wed, 10 Apr 2013 22:54:43 +0000 (15:54 -0700)
author Jingning Han <jingning@google.com>
Wed, 10 Apr 2013 19:30:20 +0000 (12:30 -0700)
committer Jingning Han <jingning@google.com>
Wed, 10 Apr 2013 22:54:43 +0000 (15:54 -0700)
diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c

index 5aecbe9..b311f32 100644 (file)
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -443,18 +443,61 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
    }
  }
  
-static void decode_sb_16x16(MACROBLOCKD *mb, int y_size) {
-  const int y_count = y_size * y_size;
-  const int uv_size = y_size / 2;
-  const int uv_count = uv_size * uv_size;
+static INLINE void decode_sby_32x32(MACROBLOCKD *mb, BLOCK_SIZE_TYPE bsize) {
+  const int bwl = mb_width_log2(bsize) - 1, bw = 1 << bwl;
+  const int bhl = mb_height_log2(bsize) - 1, bh = 1 << bhl;
+  const int y_count = bw * bh;
    int n;
  
    for (n = 0; n < y_count; n++) {
-    const int x_idx = n % y_size;
-    const int y_idx = n / y_size;
+    const int x_idx = n & (bw - 1);
+    const int y_idx = n >> bwl;
+    const int y_offset = (y_idx * 32) * mb->dst.y_stride + (x_idx * 32);
+    vp9_dequant_idct_add_32x32(BLOCK_OFFSET(mb->plane[0].qcoeff, n, 1024),
+                               mb->block[0].dequant ,
+                               mb->dst.y_buffer + y_offset,
+                               mb->dst.y_buffer + y_offset,
+                               mb->dst.y_stride, mb->dst.y_stride,
+                               mb->plane[0].eobs[n * 64]);
+  }
+}
+
+static INLINE void decode_sbuv_32x32(MACROBLOCKD *mb, BLOCK_SIZE_TYPE bsize) {
+  const int bwl = mb_width_log2(bsize) - 1, bw = (1 << bwl) / 2;
+  const int bhl = mb_height_log2(bsize) - 1, bh = (1 << bhl) / 2;
+  const int uv_count = bw * bh;
+  int n;
+  for (n = 0; n < uv_count; n++) {
+     const int x_idx = n & (bw - 1);
+     const int y_idx = n >> (bwl - 1);
+     const int uv_offset = (y_idx * 32) * mb->dst.uv_stride + (x_idx * 32);
+     vp9_dequant_idct_add_32x32(BLOCK_OFFSET(mb->plane[1].qcoeff, n, 1024),
+                                mb->block[16].dequant,
+                                mb->dst.u_buffer + uv_offset,
+                                mb->dst.u_buffer + uv_offset,
+                                mb->dst.uv_stride, mb->dst.uv_stride,
+                                mb->plane[1].eobs[n * 64]);
+     vp9_dequant_idct_add_32x32(BLOCK_OFFSET(mb->plane[2].qcoeff, n, 1024),
+                                mb->block[20].dequant,
+                                mb->dst.v_buffer + uv_offset,
+                                mb->dst.v_buffer + uv_offset,
+                                mb->dst.uv_stride, mb->dst.uv_stride,
+                                mb->plane[2].eobs[n * 64]);
+  }
+}
+
+static INLINE void decode_sby_16x16(MACROBLOCKD *mb, BLOCK_SIZE_TYPE bsize) {
+  const int bwl = mb_width_log2(bsize), bw = 1 << bwl;
+  const int bhl = mb_height_log2(bsize), bh = 1 << bhl;
+  const int y_count = bw * bh;
+  int n;
+
+  for (n = 0; n < y_count; n++) {
+    const int x_idx = n & (bw - 1);
+    const int y_idx = n >> bwl;
      const int y_offset = (y_idx * 16) * mb->dst.y_stride + (x_idx * 16);
      const TX_TYPE tx_type = get_tx_type_16x16(mb,
-                                (y_idx * (4 * y_size) + x_idx) * 4);
+                                (y_idx * (4 * bw) + x_idx) * 4);
      if (tx_type == DCT_DCT) {
        vp9_dequant_idct_add_16x16(BLOCK_OFFSET(mb->plane[0].qcoeff, n, 256),
                                   mb->block[0].dequant ,
@@ -472,10 +515,19 @@ static void decode_sb_16x16(MACROBLOCKD *mb, int y_size) {
                                    mb->plane[0].eobs[n * 16]);
      }
    }
+}
+
+static INLINE void decode_sbuv_16x16(MACROBLOCKD *mb, BLOCK_SIZE_TYPE bsize) {
+  const int bwl = mb_width_log2(bsize), bw = (1 << bwl) / 2;
+  const int bhl = mb_height_log2(bsize), bh = (1 << bhl) / 2;
+  const int uv_count = bw * bh;
+  int n;
+
+  assert(bsize >= BLOCK_SIZE_SB32X32);
  
    for (n = 0; n < uv_count; n++) {
-    const int x_idx = n % uv_size;
-    const int y_idx = n / uv_size;
+    const int x_idx = n & (bw - 1);
+    const int y_idx = n >> (bwl - 1);
      const int uv_offset = (y_idx * 16) * mb->dst.uv_stride + (x_idx * 16);
      vp9_dequant_idct_add_16x16(BLOCK_OFFSET(mb->plane[1].qcoeff, n, 256),
                                 mb->block[16].dequant,
@@ -492,19 +544,19 @@ static void decode_sb_16x16(MACROBLOCKD *mb, int y_size) {
    }
  }
  
-static INLINE void decode_sb_8x8(MACROBLOCKD *xd, int y_size) {
-  const int y_count = y_size * y_size;
-  const int uv_size = y_size / 2;
-  const int uv_count = uv_size * uv_size;
+static INLINE void decode_sby_8x8(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) {
+  const int bwl = mb_width_log2(bsize)  + 1, bw = 1 << bwl;
+  const int bhl = mb_height_log2(bsize) + 1, bh = 1 << bhl;
+  const int y_count = bw * bh;
    int n;
  
    // luma
    for (n = 0; n < y_count; n++) {
-    const int x_idx = n % y_size;
-    const int y_idx = n / y_size;
+    const int x_idx = n & (bw - 1);
+    const int y_idx = n >> bwl;
      const int y_offset = (y_idx * 8) * xd->dst.y_stride + (x_idx * 8);
      const TX_TYPE tx_type = get_tx_type_8x8(xd,
-                                            (y_idx * (2 * y_size) + x_idx) * 2);
+                                            (y_idx * (2 * bw) + x_idx) * 2);
      if (tx_type == DCT_DCT) {
        vp9_dequant_idct_add_8x8_c(BLOCK_OFFSET(xd->plane[0].qcoeff, n, 64),
                                   xd->block[0].dequant,
@@ -522,11 +574,18 @@ static INLINE void decode_sb_8x8(MACROBLOCKD *xd, int y_size) {
                                  xd->plane[0].eobs[n * 4]);
      }
    }
+}
+
+static INLINE void decode_sbuv_8x8(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) {
+  const int bwl = mb_width_log2(bsize)  + 1, bw = 1 << (bwl - 1);
+  const int bhl = mb_height_log2(bsize) + 1, bh = 1 << (bhl - 1);
+  const int uv_count = bw * bh;
+  int n;
  
    // chroma
    for (n = 0; n < uv_count; n++) {
-    const int x_idx = n % uv_size;
-    const int y_idx = n / uv_size;
+    const int x_idx = n & (bw - 1);
+    const int y_idx = n >> (bwl - 1);
      const int uv_offset = (y_idx * 8) * xd->dst.uv_stride + (x_idx * 8);
      vp9_dequant_idct_add_8x8_c(BLOCK_OFFSET(xd->plane[1].qcoeff, n, 64),
                                 xd->block[16].dequant,
@@ -543,18 +602,17 @@ static INLINE void decode_sb_8x8(MACROBLOCKD *xd, int y_size) {
    }
  }
  
-
-static void decode_sb_4x4(MACROBLOCKD *xd, int y_size) {
-  const int y_count = y_size * y_size;
-  const int uv_size = y_size / 2;
-  const int uv_count = uv_size * uv_size;
+static INLINE void decode_sby_4x4(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) {
+  const int bwl = mb_width_log2(bsize)  + 2, bw = 1 << bwl;
+  const int bhl = mb_height_log2(bsize) + 2, bh = 1 << bhl;
+  const int y_count = bw * bh;
    int n;
  
    for (n = 0; n < y_count; n++) {
-    const int x_idx = n % y_size;
-    const int y_idx = n / y_size;
+    const int x_idx = n & (bw - 1);
+    const int y_idx = n >> bwl;
      const int y_offset = (y_idx * 4) * xd->dst.y_stride + (x_idx * 4);
-    const TX_TYPE tx_type = get_tx_type_4x4(xd, y_idx * y_size + x_idx);
+    const TX_TYPE tx_type = get_tx_type_4x4(xd, n);
      if (tx_type == DCT_DCT) {
        xd->itxm_add(BLOCK_OFFSET(xd->plane[0].qcoeff, n, 16),
                     xd->block[0].dequant,
@@ -573,10 +631,17 @@ static void decode_sb_4x4(MACROBLOCKD *xd, int y_size) {
                              xd->plane[0].eobs[n]);
      }
    }
+}
+
+static INLINE void decode_sbuv_4x4(MACROBLOCKD *xd, BLOCK_SIZE_TYPE bsize) {
+  const int bwl = mb_width_log2(bsize)  + 2, bw = 1 << (bwl - 1);
+  const int bhl = mb_height_log2(bsize) + 2, bh = 1 << (bhl - 1);
+  const int uv_count = bw * bh;
+  int n;
  
    for (n = 0; n < uv_count; n++) {
-    const int x_idx = n % uv_size;
-    const int y_idx = n / uv_size;
+    const int x_idx = n & (bw - 1);
+    const int y_idx = n >> (bwl - 1);
      const int uv_offset = (y_idx * 4) * xd->dst.uv_stride + (x_idx * 4);
      xd->itxm_add(BLOCK_OFFSET(xd->plane[1].qcoeff, n, 16),
          xd->block[16].dequant,
@@ -591,14 +656,34 @@ static void decode_sb_4x4(MACROBLOCKD *xd, int y_size) {
    }
  }
  
-static void decode_sb64(VP9D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb_col,
-                        BOOL_DECODER* const bc) {
+// TODO(jingning): combine luma and chroma dequantization and inverse
+// transform into a single function looping over planes.
+static void decode_sb_32x32(MACROBLOCKD *mb, BLOCK_SIZE_TYPE bsize) {
+  decode_sby_32x32(mb, bsize);
+  if (bsize == BLOCK_SIZE_SB64X64)
+    decode_sbuv_32x32(mb, bsize);
+  else
+    decode_sbuv_16x16(mb, bsize);
+}
+
+static void decode_sb_16x16(MACROBLOCKD *mb, BLOCK_SIZE_TYPE bsize) {
+  decode_sby_16x16(mb, bsize);
+  if (bsize >= BLOCK_SIZE_SB32X32)
+    decode_sbuv_16x16(mb, bsize);
+  else
+    decode_sbuv_8x8(mb, bsize);
+}
+
+static void decode_sb(VP9D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb_col,
+                      BOOL_DECODER* const bc, BLOCK_SIZE_TYPE bsize) {
+  const int bwl = mb_width_log2(bsize), bhl = mb_height_log2(bsize);
+  const int bw = 1 << bwl, bh = 1 << bhl;
    int n, eobtotal;
    VP9_COMMON *const pc = &pbi->common;
    MODE_INFO *mi = xd->mode_info_context;
    const int mis = pc->mode_info_stride;
  
-  assert(mi->mbmi.sb_type == BLOCK_SIZE_SB64X64);
+  assert(mi->mbmi.sb_type == bsize);
  
    if (pbi->common.frame_type != KEY_FRAME)
      vp9_setup_interp_filters(xd, mi->mbmi.interp_filter, pc);
@@ -608,7 +693,7 @@ static void decode_sb64(VP9D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb_col,
      mb_init_dequantizer(pbi, xd);
  
    if (mi->mbmi.mb_skip_coeff) {
-    vp9_reset_sb_tokens_context(xd, BLOCK_SIZE_SB64X64);
+    vp9_reset_sb_tokens_context(xd, bsize);
  
      // Special case:  Force the loopfilter to skip when eobtotal and
      // mb_skip_coeff are zero.
@@ -616,19 +701,32 @@ static void decode_sb64(VP9D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb_col,
      return;
    }
  
-  // do prediction
-  if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) {
-    vp9_build_intra_predictors_sb64y_s(xd);
-    vp9_build_intra_predictors_sb64uv_s(xd);
+  // TODO(jingning): need to combine intra/inter predictor functions and
+  // make them block size independent.
+  // generate prediction
+  if (bsize == BLOCK_SIZE_SB64X64) {
+    assert(bsize == BLOCK_SIZE_SB64X64);
+    if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) {
+      vp9_build_intra_predictors_sb64y_s(xd);
+      vp9_build_intra_predictors_sb64uv_s(xd);
+    } else {
+      vp9_build_inter64x64_predictors_sb(xd, mb_row, mb_col);
+    }
    } else {
-    vp9_build_inter64x64_predictors_sb(xd, mb_row, mb_col);
+    assert(bsize == BLOCK_SIZE_SB32X32);
+    if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) {
+      vp9_build_intra_predictors_sby_s(xd);
+      vp9_build_intra_predictors_sbuv_s(xd);
+    } else {
+      vp9_build_inter32x32_predictors_sb(xd, mb_row, mb_col);
+    }
    }
  
    // dequantization and idct
-  eobtotal = vp9_decode_tokens(pbi, xd, bc, BLOCK_SIZE_SB64X64);
+  eobtotal = vp9_decode_tokens(pbi, xd, bc, bsize);
    if (eobtotal == 0) {  // skip loopfilter
-    for (n = 0; n < 16; n++) {
-      const int x_idx = n & 3, y_idx = n >> 2;
+    for (n = 0; n < bw * bh; n++) {
+      const int x_idx = n & (bw - 1), y_idx = n >> bwl;
  
        if (mb_col + x_idx < pc->mb_cols && mb_row + y_idx < pc->mb_rows)
          mi[y_idx * mis + x_idx].mbmi.mb_skip_coeff = mi->mbmi.mb_skip_coeff;
@@ -636,108 +734,18 @@ static void decode_sb64(VP9D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb_col,
    } else {
      switch (xd->mode_info_context->mbmi.txfm_size) {
        case TX_32X32:
-        for (n = 0; n < 4; n++) {
-          const int x_idx = n & 1, y_idx = n >> 1;
-          const int y_offset = x_idx * 32 + y_idx * xd->dst.y_stride * 32;
-          vp9_dequant_idct_add_32x32(BLOCK_OFFSET(xd->plane[0].qcoeff, n, 1024),
-              xd->block[0].dequant,
-              xd->dst.y_buffer + y_offset,
-              xd->dst.y_buffer + y_offset,
-              xd->dst.y_stride, xd->dst.y_stride, xd->plane[0].eobs[n * 64]);
-        }
-        vp9_dequant_idct_add_32x32(xd->plane[1].qcoeff,
-            xd->block[16].dequant, xd->dst.u_buffer, xd->dst.u_buffer,
-            xd->dst.uv_stride, xd->dst.uv_stride, xd->plane[1].eobs[0]);
-        vp9_dequant_idct_add_32x32(xd->plane[2].qcoeff,
-            xd->block[20].dequant, xd->dst.v_buffer, xd->dst.v_buffer,
-            xd->dst.uv_stride, xd->dst.uv_stride, xd->plane[2].eobs[0]);
-        break;
-      case TX_16X16:
-        decode_sb_16x16(xd, 4);
-        break;
-      case TX_8X8:
-        decode_sb_8x8(xd, 8);
-        break;
-      case TX_4X4:
-        decode_sb_4x4(xd, 16);
-        break;
-      default: assert(0);
-    }
-  }
-#if CONFIG_CODE_NONZEROCOUNT
-  propagate_nzcs(&pbi->common, xd);
-#endif
-}
-
-static void decode_sb32(VP9D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb_col,
-                        BOOL_DECODER* const bc) {
-  int eobtotal;
-  VP9_COMMON *const pc = &pbi->common;
-  MODE_INFO *mi = xd->mode_info_context;
-  const int mis = pc->mode_info_stride;
-
-  assert(mi->mbmi.sb_type == BLOCK_SIZE_SB32X32);
-
-  if (pbi->common.frame_type != KEY_FRAME)
-    vp9_setup_interp_filters(xd, mi->mbmi.interp_filter, pc);
-
-  // re-initialize macroblock dequantizer before detokenization
-  if (xd->segmentation_enabled)
-    mb_init_dequantizer(pbi, xd);
-
-  if (mi->mbmi.mb_skip_coeff) {
-    vp9_reset_sb_tokens_context(xd, BLOCK_SIZE_SB32X32);
-
-    // Special case:  Force the loopfilter to skip when eobtotal and
-    // mb_skip_coeff are zero.
-    skip_recon_mb(pbi, xd, mb_row, mb_col);
-    return;
-  }
-
-
-  // do prediction
-  if (mi->mbmi.ref_frame == INTRA_FRAME) {
-    vp9_build_intra_predictors_sby_s(xd);
-    vp9_build_intra_predictors_sbuv_s(xd);
-  } else {
-    vp9_build_inter32x32_predictors_sb(xd, mb_row, mb_col);
-  }
-
-  // dequantization and idct
-  eobtotal = vp9_decode_tokens(pbi, xd, bc, BLOCK_SIZE_SB32X32);
-  if (eobtotal == 0) {  // skip loopfilter
-    mi->mbmi.mb_skip_coeff = 1;
-    if (mb_col + 1 < pc->mb_cols)
-      mi[1].mbmi.mb_skip_coeff = 1;
-    if (mb_row + 1 < pc->mb_rows) {
-      mi[mis].mbmi.mb_skip_coeff = 1;
-      if (mb_col + 1 < pc->mb_cols)
-        mi[mis + 1].mbmi.mb_skip_coeff = 1;
-    }
-  } else {
-    switch (xd->mode_info_context->mbmi.txfm_size) {
-      case TX_32X32:
-        vp9_dequant_idct_add_32x32(xd->plane[0].qcoeff, xd->block[0].dequant,
-                                   xd->dst.y_buffer, xd->dst.y_buffer,
-                                   xd->dst.y_stride, xd->dst.y_stride,
-                                   xd->plane[0].eobs[0]);
-        vp9_dequant_idct_add_16x16(xd->plane[1].qcoeff, xd->block[16].dequant,
-                                   xd->dst.u_buffer, xd->dst.u_buffer,
-                                   xd->dst.uv_stride, xd->dst.uv_stride,
-                                   xd->plane[1].eobs[0]);
-        vp9_dequant_idct_add_16x16(xd->plane[2].qcoeff, xd->block[16].dequant,
-                                   xd->dst.v_buffer, xd->dst.v_buffer,
-                                   xd->dst.uv_stride, xd->dst.uv_stride,
-                                   xd->plane[2].eobs[0]);
+        decode_sb_32x32(xd, bsize);
          break;
        case TX_16X16:
-        decode_sb_16x16(xd, 2);
+        decode_sb_16x16(xd, bsize);
          break;
        case TX_8X8:
-        decode_sb_8x8(xd, 4);
+        decode_sby_8x8(xd, bsize);
+        decode_sbuv_8x8(xd, bsize);
          break;
        case TX_4X4:
-        decode_sb_4x4(xd, 8);
+        decode_sby_4x4(xd, bsize);
+        decode_sbuv_4x4(xd, bsize);
          break;
        default: assert(0);
      }
@@ -747,6 +755,8 @@ static void decode_sb32(VP9D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb_col,
  #endif
  }
  
+// TODO(jingning): Need to merge SB and MB decoding. The MB decoding currently
+// couples special handles on I8x8, B_PRED, and splitmv modes.
  static void decode_mb(VP9D_COMP *pbi, MACROBLOCKD *xd,
                       int mb_row, int mb_col,
                       BOOL_DECODER* const bc) {
@@ -943,7 +953,7 @@ static void decode_sb_row(VP9D_COMP *pbi, int mb_row, vp9_reader* r) {
        set_offsets(pbi, 64, mb_row, mb_col);
        vp9_decode_mb_mode_mv(pbi, xd, mb_row, mb_col, r);
        set_refs(pbi, 64, mb_row, mb_col);
-      decode_sb64(pbi, xd, mb_row, mb_col, r);
+      decode_sb(pbi, xd, mb_row, mb_col, r, BLOCK_SIZE_SB64X64);
        xd->corrupted |= bool_error(r);
      } else {
        // not SB64
@@ -962,7 +972,7 @@ static void decode_sb_row(VP9D_COMP *pbi, int mb_row, vp9_reader* r) {
            set_offsets(pbi, 32, y_idx_sb, x_idx_sb);
            vp9_decode_mb_mode_mv(pbi, xd, y_idx_sb, x_idx_sb, r);
            set_refs(pbi, 32, y_idx_sb, x_idx_sb);
-          decode_sb32(pbi, xd, y_idx_sb, x_idx_sb, r);
+          decode_sb(pbi, xd, y_idx_sb, x_idx_sb, r, BLOCK_SIZE_SB32X32);
            xd->corrupted |= bool_error(r);
          } else {
            // not SB32
author	Jingning Han <jingning@google.com>
	Wed, 10 Apr 2013 19:30:20 +0000 (12:30 -0700)
committer	Jingning Han <jingning@google.com>
	Wed, 10 Apr 2013 22:54:43 +0000 (15:54 -0700)