Move qcoeff, dqcoeff from BLOCKD to per-plane data

author John Koleszar <jkoleszar@google.com>

Tue, 2 Apr 2013 21:50:40 +0000 (14:50 -0700)

committer John Koleszar <jkoleszar@google.com>

Thu, 4 Apr 2013 23:30:57 +0000 (16:30 -0700)
author John Koleszar <jkoleszar@google.com>
Tue, 2 Apr 2013 21:50:40 +0000 (14:50 -0700)
committer John Koleszar <jkoleszar@google.com>
Thu, 4 Apr 2013 23:30:57 +0000 (16:30 -0700)
diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h

index 016244b..6fdc021 100644 (file)
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -260,8 +260,6 @@ typedef struct {
  } MODE_INFO;
  
  typedef struct blockd {
-  int16_t *qcoeff;
-  int16_t *dqcoeff;
    uint8_t *predictor;
    int16_t *diff;
    int16_t *dequant;
@@ -295,15 +293,28 @@ struct scale_factors {
  #endif
  };
  
+enum { MAX_MB_PLANE = 3 };
+
+struct mb_plane {
+  DECLARE_ALIGNED(16, int16_t,  qcoeff[64 * 64]);
+  DECLARE_ALIGNED(16, int16_t,  dqcoeff[64 * 64]);
+};
+
+#define BLOCK_OFFSET(x, i, n) ((x) + (i) * (n))
+
+#define MB_SUBBLOCK_FIELD(x, field, i) (\
+  ((i) < 16) ? BLOCK_OFFSET((x)->plane[0].field, (i), 16) : \
+  ((i) < 20) ? BLOCK_OFFSET((x)->plane[1].field, ((i) - 16), 16) : \
+  BLOCK_OFFSET((x)->plane[2].field, ((i) - 20), 16))
+
  typedef struct macroblockd {
    DECLARE_ALIGNED(16, int16_t,  diff[64*64+32*32*2]);      /* from idct diff */
    DECLARE_ALIGNED(16, uint8_t,  predictor[384]);  // unused for superblocks
-  DECLARE_ALIGNED(16, int16_t,  qcoeff[64*64+32*32*2]);
-  DECLARE_ALIGNED(16, int16_t,  dqcoeff[64*64+32*32*2]);
    DECLARE_ALIGNED(16, uint16_t, eobs[256+64*2]);
  #if CONFIG_CODE_NONZEROCOUNT
    DECLARE_ALIGNED(16, uint16_t, nzcs[256+64*2]);
  #endif
+  struct mb_plane plane[MAX_MB_PLANE];
  
    /* 16 Y blocks, 4 U, 4 V, each with 16 entries. */
    BLOCKD block[24];
@@ -384,8 +395,8 @@ typedef struct macroblockd {
    void (*itxm_add_y_block)(int16_t *q, const int16_t *dq,
      uint8_t *pre, uint8_t *dst, int stride, struct macroblockd *xd);
    void (*itxm_add_uv_block)(int16_t *q, const int16_t *dq,
-    uint8_t *pre, uint8_t *dst_u, uint8_t *dst_v, int stride,
-    struct macroblockd *xd);
+    uint8_t *pre, uint8_t *dst, int stride,
+    uint16_t *eobs);
  
    struct subpix_fn_table  subpix;
  
@@ -681,4 +692,34 @@ static int get_nzc_used(TX_SIZE tx_size) {
    return (tx_size >= TX_16X16);
  }
  #endif
+
+struct plane_block_idx {
+  int plane;
+  int block;
+};
+
+// TODO(jkoleszar): returning a struct so it can be used in a const context,
+// expect to refactor this further later.
+static INLINE struct plane_block_idx plane_block_idx(MACROBLOCKD *xd,
+                                                     int b_idx) {
+  const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type;
+  const int u_offset = 16 << (sb_type * 2);
+  const int v_offset = 20 << (sb_type * 2);
+  struct plane_block_idx res;
+
+  if (b_idx < u_offset) {
+    res.plane = 0;
+    res.block = b_idx;
+  } else if (b_idx < v_offset) {
+    res.plane = 1;
+    res.block = b_idx - u_offset;
+  } else {
+    assert(b_idx < (24 << (sb_type * 2)));
+    res.plane = 2;
+    res.block = b_idx - v_offset;
+  }
+  return res;
+}
+
+
  #endif  // VP9_COMMON_VP9_BLOCKD_H_
diff --git a/vp9/common/vp9_invtrans.c b/vp9/common/vp9_invtrans.c

index a03a66e..3c32733 100644 (file)
--- a/vp9/common/vp9_invtrans.c
+++ b/vp9/common/vp9_invtrans.c
@@ -26,9 +26,11 @@ void vp9_inverse_transform_mby_4x4(MACROBLOCKD *xd) {
    for (i = 0; i < 16; i++) {
      TX_TYPE tx_type = get_tx_type_4x4(xd, i);
      if (tx_type != DCT_DCT) {
-      vp9_short_iht4x4(xd->block[i].dqcoeff, xd->block[i].diff, 16, tx_type);
+      vp9_short_iht4x4(BLOCK_OFFSET(xd->plane[0].dqcoeff, i, 16),
+                       xd->block[i].diff, 16, tx_type);
      } else {
-      vp9_inverse_transform_b_4x4(xd, xd->eobs[i], xd->block[i].dqcoeff,
+      vp9_inverse_transform_b_4x4(xd, xd->eobs[i],
+                                  BLOCK_OFFSET(xd->plane[0].dqcoeff, i, 16),
                                    xd->block[i].diff, 32);
      }
    }
@@ -37,8 +39,14 @@ void vp9_inverse_transform_mby_4x4(MACROBLOCKD *xd) {
  void vp9_inverse_transform_mbuv_4x4(MACROBLOCKD *xd) {
    int i;
  
-  for (i = 16; i < 24; i++) {
-    vp9_inverse_transform_b_4x4(xd, xd->eobs[i], xd->block[i].dqcoeff,
+  for (i = 16; i < 20; i++) {
+    vp9_inverse_transform_b_4x4(xd, xd->eobs[i],
+                                BLOCK_OFFSET(xd->plane[1].dqcoeff, i - 16, 16),
+                                xd->block[i].diff, 16);
+  }
+  for (i = 20; i < 24; i++) {
+    vp9_inverse_transform_b_4x4(xd, xd->eobs[i],
+                                BLOCK_OFFSET(xd->plane[2].dqcoeff, i - 20, 16),
                                  xd->block[i].diff, 16);
    }
  }
@@ -60,19 +68,20 @@ void vp9_inverse_transform_mby_8x8(MACROBLOCKD *xd) {
    for (i = 0; i < 9; i += 8) {
      TX_TYPE tx_type = get_tx_type_8x8(xd, i);
      if (tx_type != DCT_DCT) {
-      vp9_short_iht8x8(xd->block[i].dqcoeff, xd->block[i].diff, 16, tx_type);
+      vp9_short_iht8x8(BLOCK_OFFSET(xd->plane[0].dqcoeff, i, 16),
+                       xd->block[i].diff, 16, tx_type);
      } else {
-      vp9_inverse_transform_b_8x8(&blockd[i].dqcoeff[0],
+      vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[0].dqcoeff, i, 16),
                                    &blockd[i].diff[0], 32);
      }
    }
    for (i = 2; i < 11; i += 8) {
      TX_TYPE tx_type = get_tx_type_8x8(xd, i);
      if (tx_type != DCT_DCT) {
-      vp9_short_iht8x8(xd->block[i + 2].dqcoeff, xd->block[i].diff,
-                           16, tx_type);
+      vp9_short_iht8x8(BLOCK_OFFSET(xd->plane[0].dqcoeff, i + 2, 16),
+                       xd->block[i].diff, 16, tx_type);
      } else {
-      vp9_inverse_transform_b_8x8(&blockd[i + 2].dqcoeff[0],
+      vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[0].dqcoeff, i + 2, 16),
                                    &blockd[i].diff[0], 32);
      }
    }
@@ -82,8 +91,12 @@ void vp9_inverse_transform_mbuv_8x8(MACROBLOCKD *xd) {
    int i;
    BLOCKD *blockd = xd->block;
  
-  for (i = 16; i < 24; i += 4) {
-    vp9_inverse_transform_b_8x8(&blockd[i].dqcoeff[0],
+  for (i = 16; i < 20; i += 4) {
+    vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[1].dqcoeff, i - 16, 16),
+                                &blockd[i].diff[0], 16);
+  }
+  for (i = 20; i < 24; i += 4) {
+    vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[2].dqcoeff, i - 20, 16),
                                  &blockd[i].diff[0], 16);
    }
  }
@@ -102,9 +115,10 @@ void vp9_inverse_transform_mby_16x16(MACROBLOCKD *xd) {
    BLOCKD *bd = &xd->block[0];
    TX_TYPE tx_type = get_tx_type_16x16(xd, 0);
    if (tx_type != DCT_DCT) {
-    vp9_short_iht16x16(bd->dqcoeff, bd->diff, 16, tx_type);
+    vp9_short_iht16x16(BLOCK_OFFSET(xd->plane[0].dqcoeff, 0, 16),
+                       bd->diff, 16, tx_type);
    } else {
-    vp9_inverse_transform_b_16x16(&xd->block[0].dqcoeff[0],
+    vp9_inverse_transform_b_16x16(BLOCK_OFFSET(xd->plane[0].dqcoeff, 0, 16),
                                    &xd->block[0].diff[0], 32);
    }
  }
@@ -115,7 +129,7 @@ void vp9_inverse_transform_mb_16x16(MACROBLOCKD *xd) {
  }
  
  void vp9_inverse_transform_sby_32x32(MACROBLOCKD *xd) {
-  vp9_short_idct32x32(xd->dqcoeff, xd->diff, 64);
+  vp9_short_idct32x32(BLOCK_OFFSET(xd->plane[0].dqcoeff, 0, 16), xd->diff, 64);
  }
  
  void vp9_inverse_transform_sby_16x16(MACROBLOCKD *xd) {
@@ -126,11 +140,11 @@ void vp9_inverse_transform_sby_16x16(MACROBLOCKD *xd) {
      const TX_TYPE tx_type = get_tx_type_16x16(xd, (y_idx * 8 + x_idx) * 4);
  
      if (tx_type == DCT_DCT) {
-      vp9_inverse_transform_b_16x16(xd->dqcoeff + n * 256,
+      vp9_inverse_transform_b_16x16(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 256),
                                      xd->diff + x_idx * 16 + y_idx * 32 * 16,
                                      64);
      } else {
-      vp9_short_iht16x16(xd->dqcoeff + n * 256,
+      vp9_short_iht16x16(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 256),
                           xd->diff + x_idx * 16 + y_idx * 32 * 16, 32, tx_type);
      }
    }
@@ -144,10 +158,10 @@ void vp9_inverse_transform_sby_8x8(MACROBLOCKD *xd) {
      const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * 8 + x_idx) * 2);
  
      if (tx_type == DCT_DCT) {
-      vp9_inverse_transform_b_8x8(xd->dqcoeff + n * 64,
+      vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 64),
                                    xd->diff + x_idx * 8 + y_idx * 32 * 8, 64);
      } else {
-      vp9_short_iht8x8(xd->dqcoeff + n * 64,
+      vp9_short_iht8x8(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 64),
                         xd->diff + x_idx * 8 + y_idx * 32 * 8, 32, tx_type);
      }
    }
@@ -161,19 +175,20 @@ void vp9_inverse_transform_sby_4x4(MACROBLOCKD *xd) {
      const TX_TYPE tx_type = get_tx_type_4x4(xd, y_idx * 8 + x_idx);
  
      if (tx_type == DCT_DCT) {
-      vp9_inverse_transform_b_4x4(xd, xd->eobs[n], xd->dqcoeff + n * 16,
+      vp9_inverse_transform_b_4x4(xd, xd->eobs[n],
+                                  BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 16),
                                    xd->diff + x_idx * 4 + y_idx * 4 * 32, 64);
      } else {
-      vp9_short_iht4x4(xd->dqcoeff + n * 16,
+      vp9_short_iht4x4(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 16),
                         xd->diff + x_idx * 4 + y_idx * 4 * 32, 32, tx_type);
      }
    }
  }
  
  void vp9_inverse_transform_sbuv_16x16(MACROBLOCKD *xd) {
-  vp9_inverse_transform_b_16x16(xd->dqcoeff + 1024,
+  vp9_inverse_transform_b_16x16(xd->plane[1].dqcoeff,
                                  xd->diff + 1024, 32);
-  vp9_inverse_transform_b_16x16(xd->dqcoeff + 1280,
+  vp9_inverse_transform_b_16x16(xd->plane[2].dqcoeff,
                                  xd->diff + 1280, 32);
  }
  
@@ -183,10 +198,10 @@ void vp9_inverse_transform_sbuv_8x8(MACROBLOCKD *xd) {
    for (n = 0; n < 4; n++) {
      const int x_idx = n & 1, y_idx = n >> 1;
  
-    vp9_inverse_transform_b_8x8(xd->dqcoeff + 1024 + n * 64,
+    vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[1].dqcoeff, n, 64),
                                  xd->diff + 1024 + x_idx * 8 + y_idx * 16 * 8,
                                  32);
-    vp9_inverse_transform_b_8x8(xd->dqcoeff + 1280 + n * 64,
+    vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[2].dqcoeff, n, 64),
                                  xd->diff + 1280 + x_idx * 8 + y_idx * 16 * 8,
                                  32);
    }
@@ -199,11 +214,11 @@ void vp9_inverse_transform_sbuv_4x4(MACROBLOCKD *xd) {
      const int x_idx = n & 3, y_idx = n >> 2;
  
      vp9_inverse_transform_b_4x4(xd, xd->eobs[64 + n],
-                                xd->dqcoeff + 1024 + n * 16,
+                                BLOCK_OFFSET(xd->plane[1].dqcoeff, n, 16),
                                  xd->diff + 1024 + x_idx * 4 + y_idx * 16 * 4,
                                  32);
      vp9_inverse_transform_b_4x4(xd, xd->eobs[64 + 16 + n],
-                                xd->dqcoeff + 1280 + n * 16,
+                                BLOCK_OFFSET(xd->plane[2].dqcoeff, n, 16),
                                  xd->diff + 1280 + x_idx * 4 + y_idx * 16 * 4,
                                  32);
    }
@@ -215,7 +230,7 @@ void vp9_inverse_transform_sb64y_32x32(MACROBLOCKD *xd) {
    for (n = 0; n < 4; n++) {
      const int x_idx = n & 1, y_idx = n >> 1;
  
-    vp9_short_idct32x32(xd->dqcoeff + n * 1024,
+    vp9_short_idct32x32(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 1024),
                          xd->diff + x_idx * 32 + y_idx * 32 * 64, 128);
    }
  }
@@ -228,11 +243,11 @@ void vp9_inverse_transform_sb64y_16x16(MACROBLOCKD *xd) {
      const TX_TYPE tx_type = get_tx_type_16x16(xd, (y_idx * 16 + x_idx) * 4);
  
      if (tx_type == DCT_DCT) {
-      vp9_inverse_transform_b_16x16(xd->dqcoeff + n * 256,
+      vp9_inverse_transform_b_16x16(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 256),
                                      xd->diff + x_idx * 16 + y_idx * 64 * 16,
                                      128);
      } else {
-      vp9_short_iht16x16(xd->dqcoeff + n * 256,
+      vp9_short_iht16x16(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 256),
                           xd->diff + x_idx * 16 + y_idx * 64 * 16, 64, tx_type);
      }
    }
@@ -246,10 +261,10 @@ void vp9_inverse_transform_sb64y_8x8(MACROBLOCKD *xd) {
      const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * 16 + x_idx) * 2);
  
      if (tx_type == DCT_DCT) {
-      vp9_inverse_transform_b_8x8(xd->dqcoeff + n * 64,
+      vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 64),
                                    xd->diff + x_idx * 8 + y_idx * 64 * 8, 128);
      } else {
-      vp9_short_iht8x8(xd->dqcoeff + n * 64,
+      vp9_short_iht8x8(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 64),
                         xd->diff + x_idx * 8 + y_idx * 64 * 8, 64, tx_type);
      }
    }
@@ -263,19 +278,20 @@ void vp9_inverse_transform_sb64y_4x4(MACROBLOCKD *xd) {
      const TX_TYPE tx_type = get_tx_type_4x4(xd, y_idx * 16 + x_idx);
  
      if (tx_type == DCT_DCT) {
-      vp9_inverse_transform_b_4x4(xd, xd->eobs[n], xd->dqcoeff + n * 16,
+      vp9_inverse_transform_b_4x4(xd, xd->eobs[n],
+                                  BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 16),
                                    xd->diff + x_idx * 4 + y_idx * 4 * 64, 128);
      } else {
-      vp9_short_iht4x4(xd->dqcoeff + n * 16,
+      vp9_short_iht4x4(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 16),
                         xd->diff + x_idx * 4 + y_idx * 4 * 64, 64, tx_type);
      }
    }
  }
  
  void vp9_inverse_transform_sb64uv_32x32(MACROBLOCKD *xd) {
-  vp9_short_idct32x32(xd->dqcoeff + 4096,
+  vp9_short_idct32x32(xd->plane[1].dqcoeff,
                        xd->diff + 4096, 64);
-  vp9_short_idct32x32(xd->dqcoeff + 4096 + 1024,
+  vp9_short_idct32x32(xd->plane[2].dqcoeff,
                        xd->diff + 4096 + 1024, 64);
  }
  
@@ -285,9 +301,9 @@ void vp9_inverse_transform_sb64uv_16x16(MACROBLOCKD *xd) {
    for (n = 0; n < 4; n++) {
      const int x_idx = n & 1, y_idx = n >> 1, off = x_idx * 16 + y_idx * 32 * 16;
  
-    vp9_inverse_transform_b_16x16(xd->dqcoeff + 4096 + n * 256,
+    vp9_inverse_transform_b_16x16(BLOCK_OFFSET(xd->plane[1].dqcoeff, n, 256),
                                    xd->diff + 4096 + off, 64);
-    vp9_inverse_transform_b_16x16(xd->dqcoeff + 4096 + 1024 + n * 256,
+    vp9_inverse_transform_b_16x16(BLOCK_OFFSET(xd->plane[2].dqcoeff, n, 256),
                                    xd->diff + 4096 + 1024 + off, 64);
    }
  }
@@ -298,9 +314,9 @@ void vp9_inverse_transform_sb64uv_8x8(MACROBLOCKD *xd) {
    for (n = 0; n < 16; n++) {
      const int x_idx = n & 3, y_idx = n >> 2, off = x_idx * 8 + y_idx * 32 * 8;
  
-    vp9_inverse_transform_b_8x8(xd->dqcoeff + 4096 + n * 64,
+    vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[1].dqcoeff, n, 64),
                                  xd->diff + 4096 + off, 64);
-    vp9_inverse_transform_b_8x8(xd->dqcoeff + 4096 + 1024 + n * 64,
+    vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[2].dqcoeff, n, 64),
                                  xd->diff + 4096 + 1024 + off, 64);
    }
  }
@@ -312,10 +328,10 @@ void vp9_inverse_transform_sb64uv_4x4(MACROBLOCKD *xd) {
      const int x_idx = n & 7, y_idx = n >> 3, off = x_idx * 4 + y_idx * 32 * 4;
  
      vp9_inverse_transform_b_4x4(xd, xd->eobs[256 + n],
-                                xd->dqcoeff + 4096 + n * 16,
+                                BLOCK_OFFSET(xd->plane[1].dqcoeff, n, 16),
                                  xd->diff + 4096 + off, 64);
      vp9_inverse_transform_b_4x4(xd, xd->eobs[256 + 64 + n],
-                                xd->dqcoeff + 4096 + 1024 + n * 16,
+                                BLOCK_OFFSET(xd->plane[2].dqcoeff, n, 16),
                                  xd->diff + 4096 + 1024 + off, 64);
    }
  }
diff --git a/vp9/common/vp9_mbpitch.c b/vp9/common/vp9_mbpitch.c

index 85ba82d..b357c9a 100644 (file)
--- a/vp9/common/vp9_mbpitch.c
+++ b/vp9/common/vp9_mbpitch.c
@@ -99,11 +99,6 @@ void vp9_setup_block_dptrs(MACROBLOCKD *mb) {
        blockd[to].predictor = &mb->predictor[from];
      }
    }
-
-  for (r = 0; r < 24; r++) {
-    blockd[r].qcoeff  = &mb->qcoeff[r * 16];
-    blockd[r].dqcoeff = &mb->dqcoeff[r * 16];
-  }
  }
  
  void vp9_build_block_doffsets(MACROBLOCKD *mb) {
diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh

index 8b6efc3..cf95524 100644 (file)
--- a/vp9/common/vp9_rtcd_defs.sh
+++ b/vp9/common/vp9_rtcd_defs.sh
@@ -29,9 +29,6 @@ forward_decls vp9_common_forward_decls
  prototype void vp9_dequant_idct_add_y_block_8x8 "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, int stride, struct macroblockd *xd"
  specialize vp9_dequant_idct_add_y_block_8x8
  
-prototype void vp9_dequant_idct_add_uv_block_8x8 "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dstu, uint8_t *dstv, int stride, struct macroblockd *xd"
-specialize vp9_dequant_idct_add_uv_block_8x8
-
  prototype void vp9_dequant_idct_add_16x16 "int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *dest, int pitch, int stride, int eob"
  specialize vp9_dequant_idct_add_16x16
  
@@ -44,15 +41,12 @@ specialize vp9_dequant_idct_add
  prototype void vp9_dequant_idct_add_y_block "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, int stride, struct macroblockd *xd"
  specialize vp9_dequant_idct_add_y_block
  
-prototype void vp9_dequant_idct_add_uv_block "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dstu, uint8_t *dstv, int stride, struct macroblockd *xd"
+prototype void vp9_dequant_idct_add_uv_block "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, int stride, uint16_t *eobs"
  specialize vp9_dequant_idct_add_uv_block
  
  prototype void vp9_dequant_idct_add_32x32 "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, int pitch, int stride, int eob"
  specialize vp9_dequant_idct_add_32x32
  
-prototype void vp9_dequant_idct_add_uv_block_16x16 "int16_t *q, const int16_t *dq, uint8_t *dstu, uint8_t *dstv, int stride, struct macroblockd *xd"
-specialize vp9_dequant_idct_add_uv_block_16x16
-
  #
  # RECON
  #
@@ -606,8 +600,7 @@ prototype void vp9_subtract_b "struct block *be, struct blockd *bd, int pitch"
  specialize vp9_subtract_b mmx sse2
  
  prototype int vp9_mbuverror "struct macroblock *mb"
-specialize vp9_mbuverror mmx sse2
-vp9_mbuverror_sse2=vp9_mbuverror_xmm
+specialize vp9_mbuverror
  
  prototype void vp9_subtract_b "struct block *be, struct blockd *bd, int pitch"
  specialize vp9_subtract_b mmx sse2
diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c

index 7d71ceb..3cefd8f 100644 (file)
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -245,19 +245,23 @@ static void decode_16x16(VP9D_COMP *pbi, MACROBLOCKD *xd,
    }
  #endif
    if (tx_type != DCT_DCT) {
-    vp9_ht_dequant_idct_add_16x16_c(tx_type, xd->qcoeff,
+    vp9_ht_dequant_idct_add_16x16_c(tx_type, xd->plane[0].qcoeff,
                                      xd->block[0].dequant, xd->predictor,
                                      xd->dst.y_buffer, 16, xd->dst.y_stride,
                                      xd->eobs[0]);
    } else {
-    vp9_dequant_idct_add_16x16(xd->qcoeff, xd->block[0].dequant,
+    vp9_dequant_idct_add_16x16(xd->plane[0].qcoeff, xd->block[0].dequant,
                                 xd->predictor, xd->dst.y_buffer,
                                 16, xd->dst.y_stride, xd->eobs[0]);
    }
-  vp9_dequant_idct_add_uv_block_8x8(
-      xd->qcoeff + 16 * 16, xd->block[16].dequant,
-      xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer,
-      xd->dst.uv_stride, xd);
+
+  vp9_dequant_idct_add_8x8(xd->plane[1].qcoeff, xd->block[16].dequant,
+                           xd->predictor + 16 * 16, xd->dst.u_buffer, 8,
+                           xd->dst.uv_stride, xd->eobs[16]);
+
+  vp9_dequant_idct_add_8x8(xd->plane[2].qcoeff, xd->block[16].dequant,
+                           xd->predictor + 16 * 16 + 64, xd->dst.v_buffer, 8,
+                           xd->dst.uv_stride, xd->eobs[20]);
  }
  
  static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd,
@@ -281,7 +285,7 @@ static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd,
      for (i = 0; i < 4; i++) {
        int ib = vp9_i8x8_block[i];
        int idx = (ib & 0x02) ? (ib + 2) : ib;
-      int16_t *q  = xd->block[idx].qcoeff;
+      int16_t *q  = BLOCK_OFFSET(xd->plane[0].qcoeff, idx, 16);
        int16_t *dq = xd->block[0].dequant;
        uint8_t *pre = xd->block[ib].predictor;
        uint8_t *dst = *(xd->block[ib].base_dst) + xd->block[ib].dst;
@@ -301,7 +305,7 @@ static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd,
        }
      }
    } else {
-    vp9_dequant_idct_add_y_block_8x8(xd->qcoeff,
+    vp9_dequant_idct_add_y_block_8x8(xd->plane[0].qcoeff,
                                       xd->block[0].dequant,
                                       xd->predictor,
                                       xd->dst.y_buffer,
@@ -319,23 +323,31 @@ static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd,
  
        b = &xd->block[16 + i];
        vp9_intra_uv4x4_predict(xd, b, i8x8mode, b->predictor);
-      xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
+      xd->itxm_add(BLOCK_OFFSET(xd->plane[1].qcoeff, i, 16),
+                   b->dequant, b->predictor,
                     *(b->base_dst) + b->dst, 8, b->dst_stride, xd->eobs[16 + i]);
  
        b = &xd->block[20 + i];
        vp9_intra_uv4x4_predict(xd, b, i8x8mode, b->predictor);
-      xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
+      xd->itxm_add(BLOCK_OFFSET(xd->plane[2].qcoeff, i, 16),
+                   b->dequant, b->predictor,
                     *(b->base_dst) + b->dst, 8, b->dst_stride, xd->eobs[20 + i]);
      }
    } else if (xd->mode_info_context->mbmi.mode == SPLITMV) {
-    xd->itxm_add_uv_block(xd->qcoeff + 16 * 16, xd->block[16].dequant,
-         xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer,
-         xd->dst.uv_stride, xd);
+    xd->itxm_add_uv_block(xd->plane[1].qcoeff, xd->block[16].dequant,
+         xd->predictor + 16 * 16, xd->dst.u_buffer,
+         xd->dst.uv_stride, xd->eobs + 16);
+    xd->itxm_add_uv_block(xd->plane[2].qcoeff, xd->block[16].dequant,
+         xd->predictor + 16 * 16 + 64, xd->dst.v_buffer,
+         xd->dst.uv_stride, xd->eobs + 20);
    } else {
-    vp9_dequant_idct_add_uv_block_8x8
-        (xd->qcoeff + 16 * 16, xd->block[16].dequant,
-         xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer,
-         xd->dst.uv_stride, xd);
+    vp9_dequant_idct_add_8x8(xd->plane[1].qcoeff, xd->block[16].dequant,
+                             xd->predictor + 16 * 16, xd->dst.u_buffer, 8,
+                             xd->dst.uv_stride, xd->eobs[16]);
+
+    vp9_dequant_idct_add_8x8(xd->plane[2].qcoeff, xd->block[16].dequant,
+                             xd->predictor + 16 * 16 + 64, xd->dst.v_buffer, 8,
+                             xd->dst.uv_stride, xd->eobs[20]);
    }
  #if 0  // def DEC_DEBUG
    if (dec_debug) {
@@ -378,23 +390,27 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
          b = &xd->block[ib + iblock[j]];
          tx_type = get_tx_type_4x4(xd, ib + iblock[j]);
          if (tx_type != DCT_DCT) {
-          vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff,
+          vp9_ht_dequant_idct_add_c(tx_type,
+              BLOCK_OFFSET(xd->plane[0].qcoeff, ib + iblock[j], 16),
                                      b->dequant, b->predictor,
                                      *(b->base_dst) + b->dst, 16,
                                      b->dst_stride, xd->eobs[ib + iblock[j]]);
          } else {
-          xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
+          xd->itxm_add(BLOCK_OFFSET(xd->plane[0].qcoeff, ib + iblock[j], 16),
+                       b->dequant, b->predictor,
                         *(b->base_dst) + b->dst, 16, b->dst_stride,
                         xd->eobs[ib + iblock[j]]);
          }
        }
        b = &xd->block[16 + i];
        vp9_intra_uv4x4_predict(xd, b, i8x8mode, b->predictor);
-      xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
+      xd->itxm_add(BLOCK_OFFSET(xd->plane[1].qcoeff, i, 16),
+                   b->dequant, b->predictor,
                     *(b->base_dst) + b->dst, 8, b->dst_stride, xd->eobs[16 + i]);
        b = &xd->block[20 + i];
        vp9_intra_uv4x4_predict(xd, b, i8x8mode, b->predictor);
-      xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
+      xd->itxm_add(BLOCK_OFFSET(xd->plane[2].qcoeff, i, 16),
+                   b->dequant, b->predictor,
                     *(b->base_dst) + b->dst, 8, b->dst_stride, xd->eobs[20 + i]);
      }
    } else if (mode == B_PRED) {
@@ -410,12 +426,14 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
        vp9_intra4x4_predict(xd, b, b_mode, b->predictor);
        tx_type = get_tx_type_4x4(xd, i);
        if (tx_type != DCT_DCT) {
-        vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff,
+        vp9_ht_dequant_idct_add_c(tx_type,
+                                  BLOCK_OFFSET(xd->plane[0].qcoeff, i, 16),
                                    b->dequant, b->predictor,
                                    *(b->base_dst) + b->dst, 16, b->dst_stride,
                                    xd->eobs[i]);
        } else {
-        xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
+        xd->itxm_add(BLOCK_OFFSET(xd->plane[0].qcoeff, i, 16),
+                     b->dequant, b->predictor,
                        *(b->base_dst) + b->dst, 16, b->dst_stride, xd->eobs[i]);
        }
      }
@@ -424,27 +442,25 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
        vp9_decode_mb_tokens_4x4_uv(pbi, xd, bc);
  #endif
      vp9_build_intra_predictors_mbuv(xd);
-    xd->itxm_add_uv_block(xd->qcoeff + 16 * 16,
-                           xd->block[16].dequant,
-                           xd->predictor + 16 * 16,
-                           xd->dst.u_buffer,
-                           xd->dst.v_buffer,
-                           xd->dst.uv_stride,
-                           xd);
+    xd->itxm_add_uv_block(xd->plane[1].qcoeff, xd->block[16].dequant,
+         xd->predictor + 16 * 16, xd->dst.u_buffer,
+         xd->dst.uv_stride, xd->eobs + 16);
+    xd->itxm_add_uv_block(xd->plane[2].qcoeff, xd->block[16].dequant,
+         xd->predictor + 16 * 16 + 64, xd->dst.v_buffer,
+         xd->dst.uv_stride, xd->eobs + 20);
    } else if (mode == SPLITMV || get_tx_type_4x4(xd, 0) == DCT_DCT) {
-    xd->itxm_add_y_block(xd->qcoeff,
+    xd->itxm_add_y_block(xd->plane[0].qcoeff,
                            xd->block[0].dequant,
                            xd->predictor,
                            xd->dst.y_buffer,
                            xd->dst.y_stride,
                            xd);
-    xd->itxm_add_uv_block(xd->qcoeff + 16 * 16,
-                           xd->block[16].dequant,
-                           xd->predictor + 16 * 16,
-                           xd->dst.u_buffer,
-                           xd->dst.v_buffer,
-                           xd->dst.uv_stride,
-                           xd);
+    xd->itxm_add_uv_block(xd->plane[1].qcoeff, xd->block[16].dequant,
+         xd->predictor + 16 * 16, xd->dst.u_buffer,
+         xd->dst.uv_stride, xd->eobs + 16);
+    xd->itxm_add_uv_block(xd->plane[2].qcoeff, xd->block[16].dequant,
+         xd->predictor + 16 * 16 + 64, xd->dst.v_buffer,
+         xd->dst.uv_stride, xd->eobs + 20);
    } else {
  #if 0  // def DEC_DEBUG
      if (dec_debug) {
@@ -467,22 +483,23 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
        BLOCKD *b = &xd->block[i];
        tx_type = get_tx_type_4x4(xd, i);
        if (tx_type != DCT_DCT) {
-        vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff,
+        vp9_ht_dequant_idct_add_c(tx_type,
+                                  BLOCK_OFFSET(xd->plane[0].qcoeff, i, 16),
                                    b->dequant, b->predictor,
                                    *(b->base_dst) + b->dst, 16,
                                    b->dst_stride, xd->eobs[i]);
        } else {
-        xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
+        xd->itxm_add(BLOCK_OFFSET(xd->plane[0].qcoeff, i, 16),
+                     b->dequant, b->predictor,
                        *(b->base_dst) + b->dst, 16, b->dst_stride, xd->eobs[i]);
        }
      }
-    xd->itxm_add_uv_block(xd->qcoeff + 16 * 16,
-                          xd->block[16].dequant,
-                          xd->predictor + 16 * 16,
-                          xd->dst.u_buffer,
-                          xd->dst.v_buffer,
-                          xd->dst.uv_stride,
-                          xd);
+    xd->itxm_add_uv_block(xd->plane[1].qcoeff, xd->block[16].dequant,
+                          xd->predictor + 16 * 16, xd->dst.u_buffer,
+                          xd->dst.uv_stride, xd->eobs + 16);
+    xd->itxm_add_uv_block(xd->plane[2].qcoeff, xd->block[16].dequant,
+                          xd->predictor + 16 * 16 + 64, xd->dst.v_buffer,
+                          xd->dst.uv_stride, xd->eobs + 20);
    }
  }
  
@@ -491,8 +508,6 @@ static void decode_sb_16x16(MACROBLOCKD *mb, int y_size) {
    const int uv_size = y_size / 2;
    const int uv_count = uv_size * uv_size;
  
-  const int u_qcoeff_offset = (16 * 16) * y_count;
-  const int v_qcoeff_offset = u_qcoeff_offset + (16 * 16) * uv_count;
    const int u_eob_offset = 16 * y_count;
    const int v_eob_offset = u_eob_offset + 16 * uv_count;
    int n;
@@ -504,7 +519,7 @@ static void decode_sb_16x16(MACROBLOCKD *mb, int y_size) {
      const TX_TYPE tx_type = get_tx_type_16x16(mb,
                                  (y_idx * (4 * y_size) + x_idx) * 4);
      if (tx_type == DCT_DCT) {
-      vp9_dequant_idct_add_16x16(mb->qcoeff + n * 16 * 16,
+      vp9_dequant_idct_add_16x16(BLOCK_OFFSET(mb->plane[0].qcoeff, n, 256),
                                   mb->block[0].dequant ,
                                   mb->dst.y_buffer + y_offset,
                                   mb->dst.y_buffer + y_offset,
@@ -512,7 +527,7 @@ static void decode_sb_16x16(MACROBLOCKD *mb, int y_size) {
                                   mb->eobs[n * 16]);
      } else {
        vp9_ht_dequant_idct_add_16x16_c(tx_type,
-                                      mb->qcoeff + n * 16 * 16,
+                                      BLOCK_OFFSET(mb->plane[0].qcoeff, n, 256),
                                        mb->block[0].dequant,
                                        mb->dst.y_buffer + y_offset,
                                        mb->dst.y_buffer + y_offset,
@@ -525,13 +540,13 @@ static void decode_sb_16x16(MACROBLOCKD *mb, int y_size) {
      const int x_idx = n % uv_size;
      const int y_idx = n / uv_size;
      const int uv_offset = (y_idx * 16) * mb->dst.uv_stride + (x_idx * 16);
-    vp9_dequant_idct_add_16x16(mb->qcoeff + u_qcoeff_offset + n * 16 * 16,
+    vp9_dequant_idct_add_16x16(BLOCK_OFFSET(mb->plane[1].qcoeff, n, 256),
                                 mb->block[16].dequant,
                                 mb->dst.u_buffer + uv_offset,
                                 mb->dst.u_buffer + uv_offset,
                                 mb->dst.uv_stride, mb->dst.uv_stride,
                                 mb->eobs[u_eob_offset + n * 16]);
-    vp9_dequant_idct_add_16x16(mb->qcoeff + v_qcoeff_offset + n * 16 * 16,
+    vp9_dequant_idct_add_16x16(BLOCK_OFFSET(mb->plane[2].qcoeff, n, 256),
                                 mb->block[20].dequant,
                                 mb->dst.v_buffer + uv_offset,
                                 mb->dst.v_buffer + uv_offset,
@@ -540,13 +555,11 @@ static void decode_sb_16x16(MACROBLOCKD *mb, int y_size) {
    }
  }
  
-static void decode_sb_8x8(MACROBLOCKD *mb, int y_size) {
+static INLINE void decode_sb_8x8(MACROBLOCKD *xd, int y_size) {
    const int y_count = y_size * y_size;
    const int uv_size = y_size / 2;
    const int uv_count = uv_size * uv_size;
  
-  const int u_qcoeff_offset = (8 * 8) * y_count;
-  const int v_qcoeff_offset = u_qcoeff_offset + (8 * 8) * uv_count;
    const int u_eob_offset = 4 * y_count;
    const int v_eob_offset = u_eob_offset + 4 * uv_count;
    int n;
@@ -555,24 +568,24 @@ static void decode_sb_8x8(MACROBLOCKD *mb, int y_size) {
    for (n = 0; n < y_count; n++) {
      const int x_idx = n % y_size;
      const int y_idx = n / y_size;
-    const int y_offset = (y_idx * 8) * mb->dst.y_stride + (x_idx * 8);
-    const TX_TYPE tx_type = get_tx_type_8x8(mb,
+    const int y_offset = (y_idx * 8) * xd->dst.y_stride + (x_idx * 8);
+    const TX_TYPE tx_type = get_tx_type_8x8(xd,
                                              (y_idx * (2 * y_size) + x_idx) * 2);
      if (tx_type == DCT_DCT) {
-      vp9_dequant_idct_add_8x8_c(mb->qcoeff + n * 8 * 8,
-                                 mb->block[0].dequant,
-                                 mb->dst.y_buffer + y_offset,
-                                 mb->dst.y_buffer + y_offset,
-                                 mb->dst.y_stride, mb->dst.y_stride,
-                                 mb->eobs[n * 4]);
+      vp9_dequant_idct_add_8x8_c(BLOCK_OFFSET(xd->plane[0].qcoeff, n, 64),
+                                 xd->block[0].dequant,
+                                 xd->dst.y_buffer + y_offset,
+                                 xd->dst.y_buffer + y_offset,
+                                 xd->dst.y_stride, xd->dst.y_stride,
+                                 xd->eobs[n * 4]);
      } else {
        vp9_ht_dequant_idct_add_8x8_c(tx_type,
-                                    mb->qcoeff + n * 8 * 8,
-                                    mb->block[0].dequant,
-                                    mb->dst.y_buffer + y_offset,
-                                    mb->dst.y_buffer + y_offset,
-                                    mb->dst.y_stride, mb->dst.y_stride,
-                                    mb->eobs[n * 4]);
+                                    BLOCK_OFFSET(xd->plane[0].qcoeff, n, 64),
+                                    xd->block[0].dequant,
+                                    xd->dst.y_buffer + y_offset,
+                                    xd->dst.y_buffer + y_offset,
+                                    xd->dst.y_stride, xd->dst.y_stride,
+                                    xd->eobs[n * 4]);
      }
    }
  
@@ -580,30 +593,28 @@ static void decode_sb_8x8(MACROBLOCKD *mb, int y_size) {
    for (n = 0; n < uv_count; n++) {
      const int x_idx = n % uv_size;
      const int y_idx = n / uv_size;
-    const int uv_offset = (y_idx * 8) * mb->dst.uv_stride + (x_idx * 8);
-    vp9_dequant_idct_add_8x8_c(mb->qcoeff + u_qcoeff_offset + n * 8 * 8,
-                               mb->block[16].dequant,
-                               mb->dst.u_buffer + uv_offset,
-                               mb->dst.u_buffer + uv_offset,
-                               mb->dst.uv_stride, mb->dst.uv_stride,
-                               mb->eobs[u_eob_offset + n * 4]);
-    vp9_dequant_idct_add_8x8_c(mb->qcoeff + v_qcoeff_offset + n * 8 * 8,
-                               mb->block[20].dequant,
-                               mb->dst.v_buffer + uv_offset,
-                               mb->dst.v_buffer + uv_offset,
-                               mb->dst.uv_stride, mb->dst.uv_stride,
-                               mb->eobs[v_eob_offset + n * 4]);
+    const int uv_offset = (y_idx * 8) * xd->dst.uv_stride + (x_idx * 8);
+    vp9_dequant_idct_add_8x8_c(BLOCK_OFFSET(xd->plane[1].qcoeff, n, 64),
+                               xd->block[16].dequant,
+                               xd->dst.u_buffer + uv_offset,
+                               xd->dst.u_buffer + uv_offset,
+                               xd->dst.uv_stride, xd->dst.uv_stride,
+                               xd->eobs[u_eob_offset + n * 4]);
+    vp9_dequant_idct_add_8x8_c(BLOCK_OFFSET(xd->plane[2].qcoeff, n, 64),
+                               xd->block[20].dequant,
+                               xd->dst.v_buffer + uv_offset,
+                               xd->dst.v_buffer + uv_offset,
+                               xd->dst.uv_stride, xd->dst.uv_stride,
+                               xd->eobs[v_eob_offset + n * 4]);
    }
  }
  
  
-static void decode_sb_4x4(MACROBLOCKD *mb, int y_size) {
+static void decode_sb_4x4(MACROBLOCKD *xd, int y_size) {
    const int y_count = y_size * y_size;
    const int uv_size = y_size / 2;
    const int uv_count = uv_size * uv_size;
  
-  const int u_qcoeff_offset = (4 * 4) * y_count;
-  const int v_qcoeff_offset = u_qcoeff_offset + (4 * 4) * uv_count;
    const int u_eob_offset = y_count;
    const int v_eob_offset = u_eob_offset + uv_count;
    int n;
@@ -611,42 +622,41 @@ static void decode_sb_4x4(MACROBLOCKD *mb, int y_size) {
    for (n = 0; n < y_count; n++) {
      const int x_idx = n % y_size;
      const int y_idx = n / y_size;
-    const int y_offset = (y_idx * 4) * mb->dst.y_stride + (x_idx * 4);
-    const TX_TYPE tx_type = get_tx_type_4x4(mb, y_idx * y_size + x_idx);
+    const int y_offset = (y_idx * 4) * xd->dst.y_stride + (x_idx * 4);
+    const TX_TYPE tx_type = get_tx_type_4x4(xd, y_idx * y_size + x_idx);
      if (tx_type == DCT_DCT) {
-      mb->itxm_add(mb->qcoeff + n * 4 * 4,
-                   mb->block[0].dequant,
-                   mb->dst.y_buffer + y_offset,
-                   mb->dst.y_buffer + y_offset,
-                   mb->dst.y_stride, mb->dst.y_stride,
-                   mb->eobs[n]);
+      xd->itxm_add(BLOCK_OFFSET(xd->plane[0].qcoeff, n, 16),
+                   xd->block[0].dequant,
+                   xd->dst.y_buffer + y_offset,
+                   xd->dst.y_buffer + y_offset,
+                   xd->dst.y_stride, xd->dst.y_stride,
+                   xd->eobs[n]);
      } else {
        vp9_ht_dequant_idct_add_c(tx_type,
-                                mb->qcoeff + n * 4 * 4,
-                                mb->block[0].dequant,
-                                mb->dst.y_buffer + y_offset,
-                                mb->dst.y_buffer + y_offset,
-                                mb->dst.y_stride, mb->dst.y_stride,
-                                mb->eobs[n]);
+                                BLOCK_OFFSET(xd->plane[0].qcoeff, n, 16),
+                                xd->block[0].dequant,
+                                xd->dst.y_buffer + y_offset,
+                                xd->dst.y_buffer + y_offset,
+                                xd->dst.y_stride,
+                                xd->dst.y_stride,
+                                xd->eobs[n]);
      }
    }
  
    for (n = 0; n < uv_count; n++) {
      const int x_idx = n % uv_size;
      const int y_idx = n / uv_size;
-    const int uv_offset = (y_idx * 4) * mb->dst.uv_stride + (x_idx * 4);
-    mb->itxm_add(mb->qcoeff + u_qcoeff_offset + n * 4 * 4,
-                 mb->block[16].dequant,
-                 mb->dst.u_buffer + uv_offset,
-                 mb->dst.u_buffer + uv_offset,
-                 mb->dst.uv_stride, mb->dst.uv_stride,
-                 mb->eobs[u_eob_offset + n]);
-    mb->itxm_add(mb->qcoeff + v_qcoeff_offset + n * 4 * 4,
-                 mb->block[20].dequant,
-                 mb->dst.v_buffer + uv_offset,
-                 mb->dst.v_buffer + uv_offset,
-                 mb->dst.uv_stride, mb->dst.uv_stride,
-                 mb->eobs[v_eob_offset + n]);
+    const int uv_offset = (y_idx * 4) * xd->dst.uv_stride + (x_idx * 4);
+    xd->itxm_add(BLOCK_OFFSET(xd->plane[1].qcoeff, n, 16),
+        xd->block[16].dequant,
+        xd->dst.u_buffer + uv_offset,
+        xd->dst.u_buffer + uv_offset,
+        xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[u_eob_offset + n]);
+    xd->itxm_add(BLOCK_OFFSET(xd->plane[2].qcoeff, n, 16),
+        xd->block[20].dequant,
+        xd->dst.v_buffer + uv_offset,
+        xd->dst.v_buffer + uv_offset,
+        xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[v_eob_offset + n]);
    }
  }
  
@@ -698,16 +708,16 @@ static void decode_sb64(VP9D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb_col,
          for (n = 0; n < 4; n++) {
            const int x_idx = n & 1, y_idx = n >> 1;
            const int y_offset = x_idx * 32 + y_idx * xd->dst.y_stride * 32;
-          vp9_dequant_idct_add_32x32(xd->qcoeff + n * 1024,
+          vp9_dequant_idct_add_32x32(BLOCK_OFFSET(xd->plane[0].qcoeff, n, 1024),
                xd->block[0].dequant,
                xd->dst.y_buffer + y_offset,
                xd->dst.y_buffer + y_offset,
                xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 64]);
          }
-        vp9_dequant_idct_add_32x32(xd->qcoeff + 4096,
+        vp9_dequant_idct_add_32x32(xd->plane[1].qcoeff,
              xd->block[16].dequant, xd->dst.u_buffer, xd->dst.u_buffer,
              xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[256]);
-        vp9_dequant_idct_add_32x32(xd->qcoeff + 4096 + 1024,
+        vp9_dequant_idct_add_32x32(xd->plane[2].qcoeff,
              xd->block[20].dequant, xd->dst.v_buffer, xd->dst.v_buffer,
              xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[320]);
          break;
@@ -776,15 +786,18 @@ static void decode_sb32(VP9D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb_col,
    } else {
      switch (xd->mode_info_context->mbmi.txfm_size) {
        case TX_32X32:
-        vp9_dequant_idct_add_32x32(xd->qcoeff, xd->block[0].dequant,
+        vp9_dequant_idct_add_32x32(xd->plane[0].qcoeff, xd->block[0].dequant,
                                     xd->dst.y_buffer, xd->dst.y_buffer,
                                     xd->dst.y_stride, xd->dst.y_stride,
                                     xd->eobs[0]);
-        vp9_dequant_idct_add_uv_block_16x16_c(xd->qcoeff + 1024,
-                                              xd->block[16].dequant,
-                                              xd->dst.u_buffer,
-                                              xd->dst.v_buffer,
-                                              xd->dst.uv_stride, xd);
+        vp9_dequant_idct_add_16x16(xd->plane[1].qcoeff, xd->block[16].dequant,
+                                   xd->dst.u_buffer, xd->dst.u_buffer,
+                                   xd->dst.uv_stride, xd->dst.uv_stride,
+                                   xd->eobs[64]);
+        vp9_dequant_idct_add_16x16(xd->plane[2].qcoeff, xd->block[16].dequant,
+                                   xd->dst.v_buffer, xd->dst.v_buffer,
+                                   xd->dst.uv_stride, xd->dst.uv_stride,
+                                   xd->eobs[80]);
          break;
        case TX_16X16:
          decode_sb_16x16(xd, 2);
@@ -1857,7 +1870,9 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) {
    vp9_build_block_doffsets(xd);
  
    // clear out the coeff buffer
-  vpx_memset(xd->qcoeff, 0, sizeof(xd->qcoeff));
+  vpx_memset(xd->plane[0].qcoeff, 0, sizeof(xd->plane[0].qcoeff));
+  vpx_memset(xd->plane[1].qcoeff, 0, sizeof(xd->plane[1].qcoeff));
+  vpx_memset(xd->plane[2].qcoeff, 0, sizeof(xd->plane[2].qcoeff));
  
    // Read the mb_no_coeff_skip flag
    pc->mb_no_coeff_skip = vp9_read_bit(&header_bc);
diff --git a/vp9/decoder/vp9_dequantize.c b/vp9/decoder/vp9_dequantize.c

index 9aebcdc..c0d1e2a 100644 (file)
--- a/vp9/decoder/vp9_dequantize.c
+++ b/vp9/decoder/vp9_dequantize.c
@@ -388,14 +388,3 @@ void vp9_dequant_idct_add_32x32_c(int16_t *input, const int16_t *dq,
      }
    }
  }
-
-void vp9_dequant_idct_add_uv_block_16x16_c(int16_t *q, const int16_t *dq,
-                                           uint8_t *dstu,
-                                           uint8_t *dstv,
-                                           int stride,
-                                           MACROBLOCKD *xd) {
-  vp9_dequant_idct_add_16x16_c(q, dq, dstu, dstu, stride, stride,
-                               xd->eobs[64]);
-  vp9_dequant_idct_add_16x16_c(q + 256, dq, dstv, dstv, stride, stride,
-                               xd->eobs[80]);
-}
diff --git a/vp9/decoder/vp9_dequantize.h b/vp9/decoder/vp9_dequantize.h

index 933108d..bb72bb2 100644 (file)
--- a/vp9/decoder/vp9_dequantize.h
+++ b/vp9/decoder/vp9_dequantize.h
@@ -40,10 +40,9 @@ void vp9_dequant_idct_add_y_block_lossless_c(int16_t *q, const int16_t *dq,
  
  void vp9_dequant_idct_add_uv_block_lossless_c(int16_t *q, const int16_t *dq,
                                                unsigned char *pre,
-                                              unsigned char *dst_u,
-                                              unsigned char *dst_v,
+                                              unsigned char *dst,
                                                int stride,
-                                              struct macroblockd *xd);
+                                              uint16_t *eobs);
  
  void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, int16_t *input, const int16_t *dq,
                                      unsigned char *pred, unsigned char *dest,
diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c

index e558263..7801c08 100644 (file)
--- a/vp9/decoder/vp9_detokenize.c
+++ b/vp9/decoder/vp9_detokenize.c
@@ -389,18 +389,31 @@ static INLINE int decode_sb(VP9D_COMP* const pbi,
    const int seg_eob = get_eob(xd, segment_id, eob_max);
    int i, eobtotal = 0;
  
+  assert(count == offset * 3 / 2);
+
    // luma blocks
    for (i = 0; i < offset; i += inc) {
      const int c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_Y_WITH_DC, seg_eob,
-                               xd->qcoeff + i * 16, tx_size);
+                               BLOCK_OFFSET(xd->plane[0].qcoeff, i, 16),
+                               tx_size);
      xd->eobs[i] = c;
      eobtotal += c;
    }
  
    // chroma blocks
-  for (i = offset; i < count; i += inc) {
+  for (i = offset; i < offset * 5 / 4; i += inc) {
+    const int b = i - offset;
+    const int c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, seg_eob,
+                               BLOCK_OFFSET(xd->plane[1].qcoeff, b, 16),
+                               tx_size);
+    xd->eobs[i] = c;
+    eobtotal += c;
+  }
+  for (i = offset * 5 / 4; i < count; i += inc) {
+    const int b = i - offset * 5 / 4;
      const int c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, seg_eob,
-                               xd->qcoeff + i * 16, tx_size);
+                               BLOCK_OFFSET(xd->plane[2].qcoeff, b, 16),
+                               tx_size);
      xd->eobs[i] = c;
      eobtotal += c;
    }
@@ -415,20 +428,24 @@ int vp9_decode_sb_tokens(VP9D_COMP* const pbi,
      case TX_32X32: {
        // 32x32 luma block
        const int segment_id = xd->mode_info_context->mbmi.segment_id;
-      int i, eobtotal = 0, seg_eob;
+      int eobtotal = 0, seg_eob;
        int c = decode_coefs(pbi, xd, bc, 0, PLANE_TYPE_Y_WITH_DC,
-                       get_eob(xd, segment_id, 1024), xd->qcoeff, TX_32X32);
+                           get_eob(xd, segment_id, 1024),
+                           xd->plane[0].qcoeff, TX_32X32);
        xd->eobs[0] = c;
        eobtotal += c;
  
        // 16x16 chroma blocks
        seg_eob = get_eob(xd, segment_id, 256);
-      for (i = 64; i < 96; i += 16) {
-        c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, seg_eob,
-                         xd->qcoeff + i * 16, TX_16X16);
-        xd->eobs[i] = c;
-        eobtotal += c;
-      }
+
+      c = decode_coefs(pbi, xd, bc, 64, PLANE_TYPE_UV, seg_eob,
+                       xd->plane[1].qcoeff, TX_16X16);
+      xd->eobs[64] = c;
+      eobtotal += c;
+      c = decode_coefs(pbi, xd, bc, 80, PLANE_TYPE_UV, seg_eob,
+                       xd->plane[2].qcoeff, TX_16X16);
+      xd->eobs[80] = c;
+      eobtotal += c;
        return eobtotal;
      }
      case TX_16X16:
@@ -465,22 +482,26 @@ static int vp9_decode_mb_tokens_16x16(VP9D_COMP* const pbi,
                                        MACROBLOCKD* const xd,
                                        BOOL_DECODER* const bc) {
    const int segment_id = xd->mode_info_context->mbmi.segment_id;
-  int i, eobtotal = 0, seg_eob;
+  int eobtotal = 0, seg_eob;
  
    // Luma block
    int c = decode_coefs(pbi, xd, bc, 0, PLANE_TYPE_Y_WITH_DC,
-                       get_eob(xd, segment_id, 256), xd->qcoeff, TX_16X16);
+                       get_eob(xd, segment_id, 256),
+                       xd->plane[0].qcoeff, TX_16X16);
    xd->eobs[0] = c;
    eobtotal += c;
  
    // 8x8 chroma blocks
    seg_eob = get_eob(xd, segment_id, 64);
-  for (i = 16; i < 24; i += 4) {
-    c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV,
-                     seg_eob, xd->block[i].qcoeff, TX_8X8);
-    xd->eobs[i] = c;
-    eobtotal += c;
-  }
+
+  c = decode_coefs(pbi, xd, bc, 16, PLANE_TYPE_UV,
+                   seg_eob, xd->plane[1].qcoeff, TX_8X8);
+  xd->eobs[16] = c;
+  eobtotal += c;
+  c = decode_coefs(pbi, xd, bc, 20, PLANE_TYPE_UV,
+                   seg_eob, xd->plane[2].qcoeff, TX_8X8);
+  xd->eobs[20] = c;
+  eobtotal += c;
    return eobtotal;
  }
  
@@ -493,8 +514,9 @@ static int vp9_decode_mb_tokens_8x8(VP9D_COMP* const pbi,
    // luma blocks
    int seg_eob = get_eob(xd, segment_id, 64);
    for (i = 0; i < 16; i += 4) {
-    const int c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_Y_WITH_DC,
-                               seg_eob, xd->block[i].qcoeff, TX_8X8);
+    const int c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_Y_WITH_DC, seg_eob,
+                               BLOCK_OFFSET(xd->plane[0].qcoeff, i, 16),
+                               TX_8X8);
      xd->eobs[i] = c;
      eobtotal += c;
    }
@@ -504,19 +526,31 @@ static int vp9_decode_mb_tokens_8x8(VP9D_COMP* const pbi,
        xd->mode_info_context->mbmi.mode == SPLITMV) {
      // use 4x4 transform for U, V components in I8X8/splitmv prediction mode
      seg_eob = get_eob(xd, segment_id, 16);
-    for (i = 16; i < 24; i++) {
-      const int c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV,
-                                 seg_eob, xd->block[i].qcoeff, TX_4X4);
+    for (i = 16; i < 20; i++) {
+      const int c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, seg_eob,
+                                 BLOCK_OFFSET(xd->plane[1].qcoeff, i - 16, 16),
+                                 TX_4X4);
        xd->eobs[i] = c;
        eobtotal += c;
      }
-  } else {
-    for (i = 16; i < 24; i += 4) {
-      const int c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV,
-                                 seg_eob, xd->block[i].qcoeff, TX_8X8);
+    for (i = 20; i < 24; i++) {
+      const int c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, seg_eob,
+                                 BLOCK_OFFSET(xd->plane[2].qcoeff, i - 20, 16),
+                                 TX_4X4);
        xd->eobs[i] = c;
        eobtotal += c;
      }
+  } else {
+    int c;
+
+    c = decode_coefs(pbi, xd, bc, 16, PLANE_TYPE_UV, seg_eob,
+                     xd->plane[1].qcoeff, TX_8X8);
+    xd->eobs[16] = c;
+    eobtotal += c;
+    c = decode_coefs(pbi, xd, bc, 20, PLANE_TYPE_UV, seg_eob,
+                     xd->plane[2].qcoeff, TX_8X8);
+    xd->eobs[20] = c;
+    eobtotal += c;
    }
  
    return eobtotal;
@@ -526,7 +560,7 @@ static int decode_coefs_4x4(VP9D_COMP *dx, MACROBLOCKD *xd,
                              BOOL_DECODER* const bc,
                              PLANE_TYPE type, int i, int seg_eob) {
    const int c = decode_coefs(dx, xd, bc, i, type, seg_eob,
-                             xd->block[i].qcoeff, TX_4X4);
+                             MB_SUBBLOCK_FIELD(xd, qcoeff, i), TX_4X4);
    xd->eobs[i] = c;
    return c;
  }
diff --git a/vp9/decoder/vp9_idct_blk.c b/vp9/decoder/vp9_idct_blk.c

index d74b619..a301a24 100644 (file)
--- a/vp9/decoder/vp9_idct_blk.c
+++ b/vp9/decoder/vp9_idct_blk.c
@@ -32,35 +32,20 @@ void vp9_dequant_idct_add_y_block_c(int16_t *q, const int16_t *dq,
  }
  
  void vp9_dequant_idct_add_uv_block_c(int16_t *q, const int16_t *dq,
-                                     uint8_t *pre, uint8_t *dstu,
-                                     uint8_t *dstv, int stride,
-                                     MACROBLOCKD *xd) {
+                                     uint8_t *pre, uint8_t *dst,
+                                     int stride, uint16_t *eobs) {
    int i, j;
  
    for (i = 0; i < 2; i++) {
      for (j = 0; j < 2; j++) {
-      vp9_dequant_idct_add(q, dq, pre, dstu, 8, stride,
-                           xd->eobs[16 + i * 2 + j]);
-      q    += 16;
-      pre  += 4;
-      dstu += 4;
-    }
-
-    pre  += 32 - 8;
-    dstu += 4 * stride - 8;
-  }
-
-  for (i = 0; i < 2; i++) {
-    for (j = 0; j < 2; j++) {
-      vp9_dequant_idct_add(q, dq, pre, dstv, 8, stride,
-                           xd->eobs[20 + i * 2 + j]);
-      q    += 16;
-      pre  += 4;
-      dstv += 4;
+      vp9_dequant_idct_add(q, dq, pre, dst, 8, stride, eobs[i * 2 + j]);
+      q   += 16;
+      pre += 4;
+      dst += 4;
      }
  
      pre  += 32 - 8;
-    dstv += 4 * stride - 8;
+    dst += 4 * stride - 8;
    }
  }
  
@@ -82,19 +67,6 @@ void vp9_dequant_idct_add_y_block_8x8_c(int16_t *q, const int16_t *dq,
                               xd->eobs[12]);
  }
  
-void vp9_dequant_idct_add_uv_block_8x8_c(int16_t *q, const int16_t *dq,
-                                         uint8_t *pre,
-                                         uint8_t *dstu,
-                                         uint8_t *dstv,
-                                         int stride, MACROBLOCKD *xd) {
-  vp9_dequant_idct_add_8x8_c(q, dq, pre, dstu, 8, stride, xd->eobs[16]);
-
-  q    += 64;
-  pre  += 64;
-
-  vp9_dequant_idct_add_8x8_c(q, dq, pre, dstv, 8, stride, xd->eobs[20]);
-}
-
  void vp9_dequant_idct_add_y_block_lossless_c(int16_t *q, const int16_t *dq,
                                               uint8_t *pre,
                                               uint8_t *dst,
@@ -117,36 +89,22 @@ void vp9_dequant_idct_add_y_block_lossless_c(int16_t *q, const int16_t *dq,
  
  void vp9_dequant_idct_add_uv_block_lossless_c(int16_t *q, const int16_t *dq,
                                                uint8_t *pre,
-                                              uint8_t *dstu,
-                                              uint8_t *dstv,
+                                              uint8_t *dst,
                                                int stride,
-                                              MACROBLOCKD *xd) {
+                                              uint16_t *eobs) {
    int i, j;
  
    for (i = 0; i < 2; i++) {
      for (j = 0; j < 2; j++) {
-      vp9_dequant_idct_add_lossless_c(q, dq, pre, dstu, 8, stride,
-                                      xd->eobs[16 + i * 2 + j]);
-      q    += 16;
-      pre  += 4;
-      dstu += 4;
-    }
-
-    pre  += 32 - 8;
-    dstu += 4 * stride - 8;
-  }
-
-  for (i = 0; i < 2; i++) {
-    for (j = 0; j < 2; j++) {
-      vp9_dequant_idct_add_lossless_c(q, dq, pre, dstv, 8, stride,
-                                      xd->eobs[20 + i * 2 + j]);
-      q    += 16;
-      pre  += 4;
-      dstv += 4;
+      vp9_dequant_idct_add_lossless_c(q, dq, pre, dst, 8, stride,
+                                      eobs[i * 2 + j]);
+      q   += 16;
+      pre += 4;
+      dst += 4;
      }
  
-    pre  += 32 - 8;
-    dstv += 4 * stride - 8;
+    pre += 32 - 8;
+    dst += 4 * stride - 8;
    }
  }
  
diff --git a/vp9/encoder/vp9_asm_enc_offsets.c b/vp9/encoder/vp9_asm_enc_offsets.c

index e174a89..1a770dc 100644 (file)
--- a/vp9/encoder/vp9_asm_enc_offsets.c
+++ b/vp9/encoder/vp9_asm_enc_offsets.c
@@ -29,9 +29,7 @@ DEFINE(vp9_block_zbin_extra,                    offsetof(BLOCK, zbin_extra));
  DEFINE(vp9_block_zrun_zbin_boost,               offsetof(BLOCK, zrun_zbin_boost));
  DEFINE(vp9_block_quant_shift,                   offsetof(BLOCK, quant_shift));
  
-DEFINE(vp9_blockd_qcoeff,                       offsetof(BLOCKD, qcoeff));
  DEFINE(vp9_blockd_dequant,                      offsetof(BLOCKD, dequant));
-DEFINE(vp9_blockd_dqcoeff,                      offsetof(BLOCKD, dqcoeff));
  
  END
  
diff --git a/vp9/encoder/vp9_encodeintra.c b/vp9/encoder/vp9_encodeintra.c

index eddacb8..883038b 100644 (file)
--- a/vp9/encoder/vp9_encodeintra.c
+++ b/vp9/encoder/vp9_encodeintra.c
@@ -16,6 +16,8 @@
  #include "vp9/common/vp9_invtrans.h"
  #include "vp9/encoder/vp9_encodeintra.h"
  
+static void encode_intra4x4block(MACROBLOCK *x, int ib);
+
  int vp9_encode_intra(VP9_COMP *cpi, MACROBLOCK *x, int use_16x16_pred) {
    MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi;
    (void) cpi;
@@ -31,18 +33,21 @@ int vp9_encode_intra(VP9_COMP *cpi, MACROBLOCK *x, int use_16x16_pred) {
  
      for (i = 0; i < 16; i++) {
        x->e_mbd.block[i].bmi.as_mode.first = B_DC_PRED;
-      vp9_encode_intra4x4block(x, i);
+      encode_intra4x4block(x, i);
      }
    }
  
    return vp9_get_mb_ss(x->src_diff);
  }
  
-void vp9_encode_intra4x4block(MACROBLOCK *x, int ib) {
+static void encode_intra4x4block(MACROBLOCK *x, int ib) {
    BLOCKD *b = &x->e_mbd.block[ib];
    BLOCK *be = &x->block[ib];
+  MACROBLOCKD * const xd = &x->e_mbd;
    TX_TYPE tx_type;
  
+  assert(ib < 16);
+
  #if CONFIG_NEWBINTRAMODES
    b->bmi.as_mode.context = vp9_find_bpred_context(&x->e_mbd, b);
  #endif
@@ -54,12 +59,14 @@ void vp9_encode_intra4x4block(MACROBLOCK *x, int ib) {
    if (tx_type != DCT_DCT) {
      vp9_short_fht4x4(be->src_diff, be->coeff, 16, tx_type);
      vp9_ht_quantize_b_4x4(x, ib, tx_type);
-    vp9_short_iht4x4(b->dqcoeff, b->diff, 16, tx_type);
+    vp9_short_iht4x4(BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16),
+                     b->diff, 16, tx_type);
    } else {
      x->fwd_txm4x4(be->src_diff, be->coeff, 32);
      x->quantize_b_4x4(x, ib);
      vp9_inverse_transform_b_4x4(&x->e_mbd, x->e_mbd.eobs[ib],
-                                b->dqcoeff, b->diff, 32);
+                                BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16),
+                                b->diff, 32);
    }
  
    vp9_recon_b(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
@@ -69,7 +76,7 @@ void vp9_encode_intra4x4mby(MACROBLOCK *mb) {
    int i;
  
    for (i = 0; i < 16; i++)
-    vp9_encode_intra4x4block(mb, i);
+    encode_intra4x4block(mb, i);
  }
  
  void vp9_encode_intra16x16mby(VP9_COMMON *const cm, MACROBLOCK *x) {
@@ -151,41 +158,47 @@ void vp9_encode_intra8x8(MACROBLOCK *x, int ib) {
  
    if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) {
      int idx = (ib & 0x02) ? (ib + 2) : ib;
+    int16_t * const dqcoeff = BLOCK_OFFSET(xd->plane[0].dqcoeff, idx, 16);
  
+    assert(idx < 16);
      tx_type = get_tx_type_8x8(xd, ib);
      if (tx_type != DCT_DCT) {
        vp9_short_fht8x8(be->src_diff, (x->block + idx)->coeff, 16, tx_type);
        x->quantize_b_8x8(x, idx, tx_type);
-      vp9_short_iht8x8(xd->block[idx].dqcoeff, xd->block[ib].diff,
+      vp9_short_iht8x8(dqcoeff, xd->block[ib].diff,
                              16, tx_type);
      } else {
        x->fwd_txm8x8(be->src_diff, (x->block + idx)->coeff, 32);
        x->quantize_b_8x8(x, idx, DCT_DCT);
-      vp9_short_idct8x8(xd->block[idx].dqcoeff, xd->block[ib].diff, 32);
+      vp9_short_idct8x8(dqcoeff, xd->block[ib].diff, 32);
      }
    } else {
      for (i = 0; i < 4; i++) {
+      int idx = ib + iblock[i];
+      int16_t * const dqcoeff = BLOCK_OFFSET(xd->plane[0].dqcoeff, idx, 16);
+
+      assert(idx < 16);
        b = &xd->block[ib + iblock[i]];
        be = &x->block[ib + iblock[i]];
        tx_type = get_tx_type_4x4(xd, ib + iblock[i]);
        if (tx_type != DCT_DCT) {
          vp9_short_fht4x4(be->src_diff, be->coeff, 16, tx_type);
          vp9_ht_quantize_b_4x4(x, ib + iblock[i], tx_type);
-        vp9_short_iht4x4(b->dqcoeff, b->diff, 16, tx_type);
+        vp9_short_iht4x4(dqcoeff, b->diff, 16, tx_type);
        } else if (!(i & 1) &&
                   get_tx_type_4x4(xd, ib + iblock[i] + 1) == DCT_DCT) {
          x->fwd_txm8x4(be->src_diff, be->coeff, 32);
          x->quantize_b_4x4_pair(x, ib + iblock[i], ib + iblock[i] + 1);
          vp9_inverse_transform_b_4x4(xd, xd->eobs[ib + iblock[i]],
-                                    b->dqcoeff, b->diff, 32);
+                                    dqcoeff, b->diff, 32);
          vp9_inverse_transform_b_4x4(xd, xd->eobs[ib + iblock[i] + 1],
-                                    (b + 1)->dqcoeff, (b + 1)->diff, 32);
+                                    dqcoeff + 16, (b + 1)->diff, 32);
          i++;
        } else {
          x->fwd_txm4x4(be->src_diff, be->coeff, 32);
          x->quantize_b_4x4(x, ib + iblock[i]);
          vp9_inverse_transform_b_4x4(xd, xd->eobs[ib + iblock[i]],
-                                    b->dqcoeff, b->diff, 32);
+                                    dqcoeff, b->diff, 32);
        }
      }
    }
@@ -206,9 +219,12 @@ void vp9_encode_intra8x8mby(MACROBLOCK *x) {
  }
  
  static void encode_intra_uv4x4(MACROBLOCK *x, int ib, int mode) {
+  MACROBLOCKD * const xd = &x->e_mbd;
    BLOCKD *b = &x->e_mbd.block[ib];
    BLOCK *be = &x->block[ib];
+  int16_t * const dqcoeff = MB_SUBBLOCK_FIELD(xd, dqcoeff, ib);
  
+  assert(ib >= 16 && ib < 24);
    vp9_intra_uv4x4_predict(&x->e_mbd, b, mode, b->predictor);
  
    vp9_subtract_b(be, b, 8);
@@ -216,7 +232,7 @@ static void encode_intra_uv4x4(MACROBLOCK *x, int ib, int mode) {
    x->fwd_txm4x4(be->src_diff, be->coeff, 16);
    x->quantize_b_4x4(x, ib);
    vp9_inverse_transform_b_4x4(&x->e_mbd, x->e_mbd.eobs[ib],
-                              b->dqcoeff, b->diff, 16);
+                              dqcoeff, b->diff, 16);
  
    vp9_recon_uv_b_c(b->predictor, b->diff, *(b->base_dst) + b->dst,
                     b->dst_stride);
diff --git a/vp9/encoder/vp9_encodeintra.h b/vp9/encoder/vp9_encodeintra.h

index 0b19b56..6576c94 100644 (file)
--- a/vp9/encoder/vp9_encodeintra.h
+++ b/vp9/encoder/vp9_encodeintra.h
@@ -17,7 +17,6 @@ int vp9_encode_intra(VP9_COMP *cpi, MACROBLOCK *x, int use_16x16_pred);
  void vp9_encode_intra16x16mby(VP9_COMMON *const cm, MACROBLOCK *x);
  void vp9_encode_intra16x16mbuv(VP9_COMMON *const cm, MACROBLOCK *x);
  void vp9_encode_intra4x4mby(MACROBLOCK *mb);
-void vp9_encode_intra4x4block(MACROBLOCK *x, int ib);
  void vp9_encode_intra8x8mby(MACROBLOCK *x);
  void vp9_encode_intra8x8mbuv(MACROBLOCK *x);
  void vp9_encode_intra8x8(MACROBLOCK *x, int ib);
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c

index 2701577..a302688 100644 (file)
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -549,9 +549,10 @@ static void optimize_b(VP9_COMMON *const cm,
    MACROBLOCKD *const xd = &mb->e_mbd;
    vp9_token_state tokens[1025][2];
    unsigned best_index[1025][2];
+  const struct plane_block_idx pb_idx = plane_block_idx(xd, ib);
    const int16_t *coeff_ptr = mb->coeff + ib * 16;
-  int16_t *qcoeff_ptr = xd->qcoeff + ib * 16;
-  int16_t *dqcoeff_ptr = xd->dqcoeff + ib * 16;
+  int16_t *qcoeff_ptr;
+  int16_t *dqcoeff_ptr;
    int eob = xd->eobs[ib], final_eob, sz = 0;
    const int i0 = 0;
    int rc, x, next, i;
@@ -582,6 +583,8 @@ static void optimize_b(VP9_COMMON *const cm,
    nzc0 = nzc1 = nzc;
  #endif
  
+  dqcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].dqcoeff, pb_idx.block, 16);
+  qcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff, pb_idx.block, 16);
    switch (tx_size) {
      default:
      case TX_4X4: {
diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c

index 881fce5..826bee4 100644 (file)
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -39,8 +39,9 @@ void vp9_ht_quantize_b_4x4(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type) {
    int zbin;
    int x, y, z, sz;
    int16_t *coeff_ptr       = mb->coeff + b_idx * 16;
-  int16_t *qcoeff_ptr      = xd->qcoeff + b_idx * 16;
-  int16_t *dqcoeff_ptr     = xd->dqcoeff + b_idx * 16;
+  // ht is luma-only
+  int16_t *qcoeff_ptr      = BLOCK_OFFSET(xd->plane[0].qcoeff, b_idx, 16);
+  int16_t *dqcoeff_ptr     = BLOCK_OFFSET(xd->plane[0].dqcoeff, b_idx, 16);
    int16_t *zbin_boost_ptr  = b->zrun_zbin_boost;
    int16_t *zbin_ptr        = b->zbin;
    int16_t *round_ptr       = b->round;
@@ -110,14 +111,17 @@ void vp9_ht_quantize_b_4x4(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type) {
  void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx) {
    MACROBLOCKD *const xd = &mb->e_mbd;
    const int c_idx = plane_idx(xd, b_idx);
+  const struct plane_block_idx pb_idx = plane_block_idx(xd, b_idx);
    BLOCK *const b = &mb->block[c_idx];
    BLOCKD *const d = &xd->block[c_idx];
    int i, rc, eob;
    int zbin;
    int x, y, z, sz;
    int16_t *coeff_ptr       = mb->coeff + b_idx * 16;
-  int16_t *qcoeff_ptr      = xd->qcoeff + b_idx * 16;
-  int16_t *dqcoeff_ptr     = xd->dqcoeff + b_idx * 16;
+  int16_t *qcoeff_ptr      = BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff,
+                                          pb_idx.block, 16);
+  int16_t *dqcoeff_ptr     = BLOCK_OFFSET(xd->plane[pb_idx.plane].dqcoeff,
+                                          pb_idx.block, 16);
    int16_t *zbin_boost_ptr  = b->zrun_zbin_boost;
    int16_t *zbin_ptr        = b->zbin;
    int16_t *round_ptr       = b->round;
@@ -186,9 +190,13 @@ void vp9_quantize_mby_4x4(MACROBLOCK *x) {
  
  void vp9_quantize_mbuv_4x4(MACROBLOCK *x) {
    int i;
+  const MACROBLOCKD * const xd = &x->e_mbd;
+  const BLOCK_SIZE_TYPE real_sb_type = xd->mode_info_context->mbmi.sb_type;
+  xd->mode_info_context->mbmi.sb_type = BLOCK_SIZE_MB16X16;
  
    for (i = 16; i < 24; i++)
      x->quantize_b_4x4(x, i);
+  xd->mode_info_context->mbmi.sb_type = real_sb_type;
  }
  
  void vp9_quantize_mb_4x4(MACROBLOCK *x) {
@@ -198,9 +206,12 @@ void vp9_quantize_mb_4x4(MACROBLOCK *x) {
  
  void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type) {
    MACROBLOCKD *const xd = &mb->e_mbd;
-  int16_t *qcoeff_ptr = xd->qcoeff + 16 * b_idx;
-  int16_t *dqcoeff_ptr = xd->dqcoeff + 16 * b_idx;
+  const struct plane_block_idx pb_idx = plane_block_idx(xd, b_idx);
    const int c_idx = plane_idx(xd, b_idx);
+  int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff,
+                                     pb_idx.block, 16);
+  int16_t *dqcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].dqcoeff,
+                                      pb_idx.block, 16);
    BLOCK *const b = &mb->block[c_idx];
    BLOCKD *const d = &xd->block[c_idx];
    const int *pt_scan;
@@ -323,6 +334,9 @@ void vp9_quantize_mby_8x8(MACROBLOCK *x) {
  
  void vp9_quantize_mbuv_8x8(MACROBLOCK *x) {
    int i;
+  const MACROBLOCKD * const xd = &x->e_mbd;
+  const BLOCK_SIZE_TYPE real_sb_type = xd->mode_info_context->mbmi.sb_type;
+  xd->mode_info_context->mbmi.sb_type = BLOCK_SIZE_MB16X16;
  
  #if CONFIG_CODE_NONZEROCOUNT
    for (i = 16; i < 24; i ++) {
@@ -331,6 +345,7 @@ void vp9_quantize_mbuv_8x8(MACROBLOCK *x) {
  #endif
    for (i = 16; i < 24; i += 4)
      x->quantize_b_8x8(x, i, DCT_DCT);
+  xd->mode_info_context->mbmi.sb_type = real_sb_type;
  }
  
  void vp9_quantize_mb_8x8(MACROBLOCK *x) {
@@ -418,6 +433,7 @@ static void quantize(int16_t *zbin_boost_orig_ptr,
  void vp9_regular_quantize_b_16x16(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type) {
    MACROBLOCKD *const xd = &mb->e_mbd;
    const int c_idx = plane_idx(xd, b_idx);
+  const struct plane_block_idx pb_idx = plane_block_idx(xd, b_idx);
    BLOCK *const b = &mb->block[c_idx];
    BLOCKD *const d = &xd->block[c_idx];
    const int *pt_scan;
@@ -438,8 +454,8 @@ void vp9_regular_quantize_b_16x16(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type) {
             mb->coeff + 16 * b_idx,
             256, b->skip_block,
             b->zbin, b->round, b->quant, b->quant_shift,
-           xd->qcoeff + 16 * b_idx,
-           xd->dqcoeff + 16 * b_idx,
+           BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff, pb_idx.block, 16),
+           BLOCK_OFFSET(xd->plane[pb_idx.plane].dqcoeff, pb_idx.block, 16),
             d->dequant,
             b->zbin_extra,
             &xd->eobs[b_idx],
@@ -452,6 +468,7 @@ void vp9_regular_quantize_b_16x16(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type) {
  void vp9_regular_quantize_b_32x32(MACROBLOCK *mb, int b_idx) {
    MACROBLOCKD *const xd = &mb->e_mbd;
    const int c_idx = plane_idx(xd, b_idx);
+  const struct plane_block_idx pb_idx = plane_block_idx(xd, b_idx);
    BLOCK *const b = &mb->block[c_idx];
    BLOCKD *const d = &xd->block[c_idx];
  
@@ -460,8 +477,8 @@ void vp9_regular_quantize_b_32x32(MACROBLOCK *mb, int b_idx) {
             1024, b->skip_block,
             b->zbin,
             b->round, b->quant, b->quant_shift,
-           xd->qcoeff + b_idx * 16,
-           xd->dqcoeff + b_idx * 16,
+           BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff, pb_idx.block, 16),
+           BLOCK_OFFSET(xd->plane[pb_idx.plane].dqcoeff, pb_idx.block, 16),
             d->dequant,
             b->zbin_extra,
             &xd->eobs[b_idx],
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c

index 34adc99..82c5b5b 100644 (file)
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -348,35 +348,36 @@ int vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff, int block_size) {
  }
  
  int vp9_mbblock_error_c(MACROBLOCK *mb) {
+  MACROBLOCKD * const xd = &mb->e_mbd;
    BLOCK  *be;
-  BLOCKD *bd;
-  int i, j;
-  int berror, error = 0;
+  int i;
+  int error = 0;
  
    for (i = 0; i < 16; i++) {
      be = &mb->block[i];
-    bd = &mb->e_mbd.block[i];
-    berror = 0;
-    for (j = 0; j < 16; j++) {
-      int this_diff = be->coeff[j] - bd->dqcoeff[j];
-      berror += this_diff * this_diff;
-    }
-    error += berror;
+    error += vp9_block_error(be->coeff,
+                             BLOCK_OFFSET(xd->plane[0].dqcoeff, i, 16), 16);
    }
    return error;
  }
  
  int vp9_mbuverror_c(MACROBLOCK *mb) {
+  MACROBLOCKD * const xd = &mb->e_mbd;
    BLOCK  *be;
-  BLOCKD *bd;
  
    int i, error = 0;
  
-  for (i = 16; i < 24; i++) {
+  for (i = 16; i < 20; i++) {
      be = &mb->block[i];
-    bd = &mb->e_mbd.block[i];
-
-    error += vp9_block_error_c(be->coeff, bd->dqcoeff, 16);
+    error += vp9_block_error(be->coeff,
+                             BLOCK_OFFSET(xd->plane[1].dqcoeff, i - 16, 16),
+                             16);
+  }
+  for (i = 20; i < 24; i++) {
+    be = &mb->block[i];
+    error += vp9_block_error(be->coeff,
+                             BLOCK_OFFSET(xd->plane[2].dqcoeff, i - 20, 16),
+                             16);
    }
  
    return error;
@@ -438,7 +439,9 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb,
    int c = 0;
    int cost = 0, pad;
    const int *scan, *nb;
-  const int16_t *qcoeff_ptr = xd->qcoeff + ib * 16;
+  const struct plane_block_idx pb_idx = plane_block_idx(xd, ib);
+  const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff,
+                                           pb_idx.block, 16);
    const int ref = mbmi->ref_frame != INTRA_FRAME;
    unsigned int (*token_costs)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] =
        mb->token_costs[tx_size][type][ref];
@@ -858,6 +861,26 @@ static int vp9_sb_block_error_c(int16_t *coeff, int16_t *dqcoeff,
    return error > INT_MAX ? INT_MAX : (int)error;
  }
  
+static int vp9_sb_uv_block_error_c(int16_t *coeff,
+                                   int16_t *dqcoeff0, int16_t *dqcoeff1,
+                                   int block_size, int shift) {
+  int i;
+  int64_t error = 0;
+
+  for (i = 0; i < block_size / 2; i++) {
+    unsigned int this_diff = coeff[i] - dqcoeff0[i];
+    error += this_diff * this_diff;
+  }
+  coeff += block_size / 2;
+  for (i = 0; i < block_size / 2; i++) {
+    unsigned int this_diff = coeff[i] - dqcoeff1[i];
+    error += this_diff * this_diff;
+  }
+  error >>= shift;
+
+  return error > INT_MAX ? INT_MAX : (int)error;
+}
+
  static int rdcost_sby_4x4(VP9_COMMON *const cm, MACROBLOCK *x) {
    int cost = 0, b;
    MACROBLOCKD *const xd = &x->e_mbd;
@@ -884,7 +907,7 @@ static void super_block_yrd_4x4(VP9_COMMON *const cm, MACROBLOCK *x,
    vp9_transform_sby_4x4(x);
    vp9_quantize_sby_4x4(x);
  
-  *distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 1024, 2);
+  *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, 1024, 2);
    *rate       = rdcost_sby_4x4(cm, x);
    *skippable  = vp9_sby_is_skippable_4x4(xd);
  }
@@ -915,7 +938,7 @@ static void super_block_yrd_8x8(VP9_COMMON *const cm, MACROBLOCK *x,
    vp9_transform_sby_8x8(x);
    vp9_quantize_sby_8x8(x);
  
-  *distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 1024, 2);
+  *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, 1024, 2);
    *rate       = rdcost_sby_8x8(cm, x);
    *skippable  = vp9_sby_is_skippable_8x8(xd);
  }
@@ -946,7 +969,7 @@ static void super_block_yrd_16x16(VP9_COMMON *const cm, MACROBLOCK *x,
    vp9_transform_sby_16x16(x);
    vp9_quantize_sby_16x16(x);
  
-  *distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 1024, 2);
+  *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, 1024, 2);
    *rate       = rdcost_sby_16x16(cm, x);
    *skippable  = vp9_sby_is_skippable_16x16(xd);
  }
@@ -971,7 +994,7 @@ static void super_block_yrd_32x32(VP9_COMMON *const cm, MACROBLOCK *x,
    vp9_transform_sby_32x32(x);
    vp9_quantize_sby_32x32(x);
  
-  *distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 1024, 0);
+  *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, 1024, 0);
    *rate       = rdcost_sby_32x32(cm, x);
    *skippable  = vp9_sby_is_skippable_32x32(xd);
  }
@@ -1022,7 +1045,7 @@ static void super_block64_yrd_4x4(VP9_COMMON *const cm, MACROBLOCK *x,
    vp9_transform_sb64y_4x4(x);
    vp9_quantize_sb64y_4x4(x);
  
-  *distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 4096, 2);
+  *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, 4096, 2);
    *rate       = rdcost_sb64y_4x4(cm, x);
    *skippable  = vp9_sb64y_is_skippable_4x4(xd);
  }
@@ -1053,7 +1076,7 @@ static void super_block64_yrd_8x8(VP9_COMMON *const cm, MACROBLOCK *x,
    vp9_transform_sb64y_8x8(x);
    vp9_quantize_sb64y_8x8(x);
  
-  *distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 4096, 2);
+  *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, 4096, 2);
    *rate       = rdcost_sb64y_8x8(cm, x);
    *skippable  = vp9_sb64y_is_skippable_8x8(xd);
  }
@@ -1085,7 +1108,7 @@ static void super_block64_yrd_16x16(VP9_COMMON *const cm, MACROBLOCK *x,
    vp9_transform_sb64y_16x16(x);
    vp9_quantize_sb64y_16x16(x);
  
-  *distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 4096, 2);
+  *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, 4096, 2);
    *rate       = rdcost_sb64y_16x16(cm, x);
    *skippable  = vp9_sb64y_is_skippable_16x16(xd);
  }
@@ -1117,7 +1140,7 @@ static void super_block64_yrd_32x32(VP9_COMMON *const cm, MACROBLOCK *x,
    vp9_transform_sb64y_32x32(x);
    vp9_quantize_sb64y_32x32(x);
  
-  *distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 4096, 0);
+  *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, 4096, 0);
    *rate       = rdcost_sb64y_32x32(cm, x);
    *skippable  = vp9_sb64y_is_skippable_32x32(xd);
  }
@@ -1163,8 +1186,8 @@ static void copy_predictor_8x8(uint8_t *dst, const uint8_t *predictor) {
    d[29] = p[29];
  }
  
-static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be,
-                                     BLOCKD *b, B_PREDICTION_MODE *best_mode,
+static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
+                                     B_PREDICTION_MODE *best_mode,
                                       int *bmode_costs,
                                       ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
                                       int *bestrate, int *bestratey,
@@ -1175,6 +1198,8 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be,
    int rate = 0;
    int distortion;
    VP9_COMMON *const cm = &cpi->common;
+  BLOCK *be = x->block + ib;
+  BLOCKD *b = xd->block + ib;
  
    ENTROPY_CONTEXT ta = *a, tempa = *a;
    ENTROPY_CONTEXT tl = *l, templ = *l;
@@ -1188,6 +1213,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be,
    DECLARE_ALIGNED_ARRAY(16, uint8_t, best_predictor, 16 * 4);
    DECLARE_ALIGNED_ARRAY(16, int16_t, best_dqcoeff, 16);
  
+  assert(ib < 16);
  #if CONFIG_NEWBINTRAMODES
    b->bmi.as_mode.context = vp9_find_bpred_context(xd, b);
  #endif
@@ -1233,7 +1259,9 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be,
      ratey = cost_coeffs(cm, x, b - xd->block,
                          PLANE_TYPE_Y_WITH_DC, &tempa, &templ, TX_4X4);
      rate += ratey;
-    distortion = vp9_block_error(be->coeff, b->dqcoeff, 16) >> 2;
+    distortion = vp9_block_error(be->coeff,
+                                 BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16),
+                                 16) >> 2;
  
      this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
  
@@ -1247,7 +1275,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be,
        *a = tempa;
        *l = templ;
        copy_predictor(best_predictor, b->predictor);
-      vpx_memcpy(best_dqcoeff, b->dqcoeff, 32);
+      vpx_memcpy(best_dqcoeff, BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16), 32);
      }
    }
    b->bmi.as_mode.first = (B_PREDICTION_MODE)(*best_mode);
@@ -1304,7 +1332,7 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb,
  #endif
  
      total_rd += rd_pick_intra4x4block(
-                  cpi, mb, mb->block + i, xd->block + i, &best_mode,
+                  cpi, mb, i, &best_mode,
                    bmode_costs, ta + vp9_block2above[TX_4X4][i],
                    tl + vp9_block2left[TX_4X4][i], &r, &ry, &d);
  
@@ -1504,6 +1532,7 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
    // note the input and output index mapping
    int idx = (ib & 0x02) ? (ib + 2) : ib;
  
+  assert(ib < 16);
    for (mode = DC_PRED; mode <= TM_PRED; mode++) {
      int64_t this_rd;
      int rate_t = 0;
@@ -1526,7 +1555,7 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
  
        // compute quantization mse of 8x8 block
        distortion = vp9_block_error_c((x->block + idx)->coeff,
-                                     (xd->block + idx)->dqcoeff, 64);
+          BLOCK_OFFSET(xd->plane[0].dqcoeff, idx, 16), 64);
  
        vpx_memcpy(&ta, a, sizeof(ENTROPY_CONTEXT_PLANES));
        vpx_memcpy(&tl, l, sizeof(ENTROPY_CONTEXT_PLANES));
@@ -1569,7 +1598,9 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
            x->fwd_txm4x4(be->src_diff, be->coeff, 32);
            x->quantize_b_4x4(x, ib + iblock[i]);
          }
-        distortion += vp9_block_error_c(be->coeff, b->dqcoeff, 16 << do_two);
+        distortion += vp9_block_error_c(be->coeff,
+            BLOCK_OFFSET(xd->plane[0].dqcoeff, ib + iblock[i], 16),
+            16 << do_two);
          rate_t += cost_coeffs(cm, x, ib + iblock[i], PLANE_TYPE_Y_WITH_DC,
                                i&1 ? ta1 : ta0, i&2 ? tl1 : tl0,
                                TX_4X4);
@@ -1598,8 +1629,10 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
        best_rd = this_rd;
        *best_mode = mode;
        copy_predictor_8x8(best_predictor, b->predictor);
-      vpx_memcpy(best_dqcoeff, b->dqcoeff, 64);
-      vpx_memcpy(best_dqcoeff + 32, b->dqcoeff + 64, 64);
+      vpx_memcpy(best_dqcoeff,
+                 BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16), 64);
+      vpx_memcpy(best_dqcoeff + 32,
+                 BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16) + 64, 64);
      }
    }
    b->bmi.as_mode.first = (*best_mode);
@@ -1742,6 +1775,8 @@ static int rd_cost_mbuv_4x4(VP9_COMMON *const cm, MACROBLOCK *mb, int backup) {
    MACROBLOCKD *xd = &mb->e_mbd;
    ENTROPY_CONTEXT_PLANES t_above, t_left;
    ENTROPY_CONTEXT *ta, *tl;
+  const BLOCK_SIZE_TYPE real_sb_type = xd->mode_info_context->mbmi.sb_type;
+  xd->mode_info_context->mbmi.sb_type = BLOCK_SIZE_MB16X16;
  
    if (backup) {
      vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
@@ -1760,6 +1795,7 @@ static int rd_cost_mbuv_4x4(VP9_COMMON *const cm, MACROBLOCK *mb, int backup) {
                          tl + vp9_block2left[TX_4X4][b],
                          TX_4X4);
  
+  xd->mode_info_context->mbmi.sb_type = real_sb_type;
    return cost;
  }
  
@@ -1783,6 +1819,8 @@ static int rd_cost_mbuv_8x8(VP9_COMMON *const cm, MACROBLOCK *mb, int backup) {
    MACROBLOCKD *xd = &mb->e_mbd;
    ENTROPY_CONTEXT_PLANES t_above, t_left;
    ENTROPY_CONTEXT *ta, *tl;
+  const BLOCK_SIZE_TYPE real_sb_type = xd->mode_info_context->mbmi.sb_type;
+  xd->mode_info_context->mbmi.sb_type = BLOCK_SIZE_MB16X16;
  
    if (backup) {
      vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES));
@@ -1800,6 +1838,7 @@ static int rd_cost_mbuv_8x8(VP9_COMMON *const cm, MACROBLOCK *mb, int backup) {
                          ta + vp9_block2above[TX_8X8][b],
                          tl + vp9_block2left[TX_8X8][b], TX_8X8);
  
+  xd->mode_info_context->mbmi.sb_type = real_sb_type;
    return cost;
  }
  
@@ -1851,8 +1890,9 @@ static void rd_inter32x32_uv_16x16(VP9_COMMON *const cm, MACROBLOCK *x,
    vp9_quantize_sbuv_16x16(x);
  
    *rate       = rd_cost_sbuv_16x16(cm, x, backup);
-  *distortion = vp9_sb_block_error_c(x->coeff + 1024,
-                                     xd->dqcoeff + 1024, 512, 2);
+  *distortion = vp9_sb_uv_block_error_c(x->coeff + 1024,
+                                        xd->plane[1].dqcoeff,
+                                        xd->plane[2].dqcoeff, 512, 2);
    *skip       = vp9_sbuv_is_skippable_16x16(xd);
  }
  
@@ -2127,8 +2167,9 @@ static void rd_inter64x64_uv_32x32(VP9_COMMON *const cm, MACROBLOCK *x,
    vp9_quantize_sb64uv_32x32(x);
  
    *rate       = rd_cost_sb64uv_32x32(cm, x, backup);
-  *distortion = vp9_sb_block_error_c(x->coeff + 4096,
-                                     xd->dqcoeff + 4096, 2048, 0);
+  *distortion = vp9_sb_uv_block_error_c(x->coeff + 4096,
+                                        xd->plane[1].dqcoeff,
+                                        xd->plane[2].dqcoeff, 2048, 0);
    *skip       = vp9_sb64uv_is_skippable_32x32(xd);
  }
  
@@ -2466,7 +2507,8 @@ static int64_t encode_inter_mb_segment(VP9_COMMON *const cm,
        vp9_subtract_b(be, bd, 16);
        x->fwd_txm4x4(be->src_diff, be->coeff, 32);
        x->quantize_b_4x4(x, i);
-      thisdistortion = vp9_block_error(be->coeff, bd->dqcoeff, 16);
+      thisdistortion = vp9_block_error(be->coeff,
+          BLOCK_OFFSET(xd->plane[0].dqcoeff, i, 16), 16);
        *distortion += thisdistortion;
        *labelyrate += cost_coeffs(cm, x, i, PLANE_TYPE_Y_WITH_DC,
                                   ta + vp9_block2above[TX_4X4][i],
@@ -2508,11 +2550,12 @@ static int64_t encode_inter_mb_segment_8x8(VP9_COMMON *const cm,
        const int use_second_ref =
            xd->mode_info_context->mbmi.second_ref_frame > 0;
        int which_mv;
-      int idx = (ib & 8) + ((ib & 2) << 1);
-      BLOCKD *bd = &xd->block[ib], *bd2 = &xd->block[idx];
+      const int idx = (ib & 8) + ((ib & 2) << 1);
+      BLOCKD *bd = &xd->block[ib];
        BLOCK *be = &x->block[ib], *be2 = &x->block[idx];
        int thisdistortion;
  
+      assert(idx < 16);
        for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) {
          uint8_t **base_pre = which_mv ? bd->base_second_pre : bd->base_pre;
  
@@ -2532,7 +2575,8 @@ static int64_t encode_inter_mb_segment_8x8(VP9_COMMON *const cm,
          if (otherrd) {
            x->fwd_txm8x8(be->src_diff, be2->coeff, 32);
            x->quantize_b_8x8(x, idx, DCT_DCT);
-          thisdistortion = vp9_block_error_c(be2->coeff, bd2->dqcoeff, 64);
+          thisdistortion = vp9_block_error_c(be2->coeff,
+              BLOCK_OFFSET(xd->plane[0].dqcoeff, idx, 16), 64);
            otherdist += thisdistortion;
            xd->mode_info_context->mbmi.txfm_size = TX_8X8;
            othercost += cost_coeffs(cm, x, idx, PLANE_TYPE_Y_WITH_DC,
@@ -2546,7 +2590,8 @@ static int64_t encode_inter_mb_segment_8x8(VP9_COMMON *const cm,
            be = &x->block[ib + iblock[j]];
            x->fwd_txm8x4(be->src_diff, be->coeff, 32);
            x->quantize_b_4x4_pair(x, ib + iblock[j], ib + iblock[j] + 1);
-          thisdistortion = vp9_block_error_c(be->coeff, bd->dqcoeff, 32);
+          thisdistortion = vp9_block_error_c(be->coeff,
+              BLOCK_OFFSET(xd->plane[0].dqcoeff, ib + iblock[j], 16), 32);
            *distortion += thisdistortion;
            *labelyrate +=
                cost_coeffs(cm, x, ib + iblock[j], PLANE_TYPE_Y_WITH_DC,
@@ -2563,11 +2608,11 @@ static int64_t encode_inter_mb_segment_8x8(VP9_COMMON *const cm,
        } else /* 8x8 */ {
          if (otherrd) {
            for (j = 0; j < 4; j += 2) {
-            BLOCKD *bd = &xd->block[ib + iblock[j]];
              BLOCK *be = &x->block[ib + iblock[j]];
              x->fwd_txm8x4(be->src_diff, be->coeff, 32);
              x->quantize_b_4x4_pair(x, ib + iblock[j], ib + iblock[j] + 1);
-            thisdistortion = vp9_block_error_c(be->coeff, bd->dqcoeff, 32);
+            thisdistortion = vp9_block_error_c(be->coeff,
+                BLOCK_OFFSET(xd->plane[0].dqcoeff, ib + iblock[j], 16), 32);
              otherdist += thisdistortion;
              xd->mode_info_context->mbmi.txfm_size = TX_4X4;
              othercost +=
@@ -2586,7 +2631,8 @@ static int64_t encode_inter_mb_segment_8x8(VP9_COMMON *const cm,
          }
          x->fwd_txm8x8(be->src_diff, be2->coeff, 32);
          x->quantize_b_8x8(x, idx, DCT_DCT);
-        thisdistortion = vp9_block_error_c(be2->coeff, bd2->dqcoeff, 64);
+        thisdistortion = vp9_block_error_c(be2->coeff,
+            BLOCK_OFFSET(xd->plane[0].dqcoeff, idx, 16), 64);
          *distortion += thisdistortion;
          *labelyrate += cost_coeffs(cm, x, idx, PLANE_TYPE_Y_WITH_DC,
                                     ta + vp9_block2above[TX_8X8][idx],
diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c

index 8f9e9da..ab286fd 100644 (file)
--- a/vp9/encoder/vp9_tokenize.c
+++ b/vp9/encoder/vp9_tokenize.c
@@ -123,7 +123,9 @@ static void tokenize_b(VP9_COMP *cpi,
    int c = 0;
    const int eob = xd->eobs[ib];     /* one beyond last nonzero coeff */
    TOKENEXTRA *t = *tp;        /* store tokens starting here */
-  int16_t *qcoeff_ptr = xd->qcoeff + 16 * ib;
+  const struct plane_block_idx pb_idx = plane_block_idx(xd, ib);
+  const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff,
+                                           pb_idx.block, 16);
    int seg_eob, default_eob, pad;
    const int segment_id = mbmi->segment_id;
    const BLOCK_SIZE_TYPE sb_type = mbmi->sb_type;
diff --git a/vp9/encoder/x86/vp9_encodeopt.asm b/vp9/encoder/x86/vp9_encodeopt.asm

index 90c793d..51314a7 100644 (file)
--- a/vp9/encoder/x86/vp9_encodeopt.asm
+++ b/vp9/encoder/x86/vp9_encodeopt.asm
@@ -260,117 +260,3 @@ sym(vp9_mbblock_error_xmm_impl):
      UNSHADOW_ARGS
      pop         rbp
      ret
-
-
-;int vp9_mbuverror_mmx_impl(short *s_ptr, short *d_ptr);
-global sym(vp9_mbuverror_mmx_impl) PRIVATE
-sym(vp9_mbuverror_mmx_impl):
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 2
-    push rsi
-    push rdi
-    ; end prolog
-
-
-        mov             rsi,        arg(0) ;s_ptr
-        mov             rdi,        arg(1) ;d_ptr
-
-        mov             rcx,        16
-        pxor            mm7,        mm7
-
-.mbuverror_loop_mmx:
-
-        movq            mm1,        [rsi]
-        movq            mm2,        [rdi]
-
-        psubw           mm1,        mm2
-        pmaddwd         mm1,        mm1
-
-
-        movq            mm3,        [rsi+8]
-        movq            mm4,        [rdi+8]
-
-        psubw           mm3,        mm4
-        pmaddwd         mm3,        mm3
-
-
-        paddd           mm7,        mm1
-        paddd           mm7,        mm3
-
-
-        add             rsi,        16
-        add             rdi,        16
-
-        dec             rcx
-        jnz             .mbuverror_loop_mmx
-
-        movq            mm0,        mm7
-        psrlq           mm7,        32
-
-        paddd           mm0,        mm7
-        movq            rax,        mm0
-
-    pop rdi
-    pop rsi
-    ; begin epilog
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
-
-
-;int vp9_mbuverror_xmm_impl(short *s_ptr, short *d_ptr);
-global sym(vp9_mbuverror_xmm_impl) PRIVATE
-sym(vp9_mbuverror_xmm_impl):
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 2
-    push rsi
-    push rdi
-    ; end prolog
-
-
-        mov             rsi,        arg(0) ;s_ptr
-        mov             rdi,        arg(1) ;d_ptr
-
-        mov             rcx,        16
-        pxor            xmm3,       xmm3
-
-.mbuverror_loop:
-
-        movdqa          xmm1,       [rsi]
-        movdqa          xmm2,       [rdi]
-
-        psubw           xmm1,       xmm2
-        pmaddwd         xmm1,       xmm1
-
-        paddd           xmm3,       xmm1
-
-        add             rsi,        16
-        add             rdi,        16
-
-        dec             rcx
-        jnz             .mbuverror_loop
-
-        pxor        xmm0,           xmm0
-        movdqa      xmm1,           xmm3
-
-        movdqa      xmm2,           xmm1
-        punpckldq   xmm1,           xmm0
-
-        punpckhdq   xmm2,           xmm0
-        paddd       xmm1,           xmm2
-
-        movdqa      xmm2,           xmm1
-
-        psrldq      xmm1,           8
-        paddd       xmm1,           xmm2
-
-        movq            rax,            xmm1
-
-    pop rdi
-    pop rsi
-    ; begin epilog
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
diff --git a/vp9/encoder/x86/vp9_x86_csystemdependent.c b/vp9/encoder/x86/vp9_x86_csystemdependent.c

index 2bf32c5..9557af1 100644 (file)
--- a/vp9/encoder/x86/vp9_x86_csystemdependent.c
+++ b/vp9/encoder/x86/vp9_x86_csystemdependent.c
@@ -26,17 +26,10 @@ void vp9_short_fdct8x4_mmx(short *input, short *output, int pitch) {
  int vp9_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr);
  int vp9_mbblock_error_mmx(MACROBLOCK *mb) {
    short *coeff_ptr =  mb->block[0].coeff;
-  short *dcoef_ptr =  mb->e_mbd.block[0].dqcoeff;
+  short *dcoef_ptr =  mb->e_mbd.plane[0].dqcoeff;
    return vp9_mbblock_error_mmx_impl(coeff_ptr, dcoef_ptr);
  }
  
-int vp9_mbuverror_mmx_impl(short *s_ptr, short *d_ptr);
-int vp9_mbuverror_mmx(MACROBLOCK *mb) {
-  short *s_ptr = &mb->coeff[256];
-  short *d_ptr = &mb->e_mbd.dqcoeff[256];
-  return vp9_mbuverror_mmx_impl(s_ptr, d_ptr);
-}
-
  void vp9_subtract_b_mmx_impl(unsigned char *z,  int src_stride,
                               short *diff, unsigned char *predictor,
                               int pitch);
@@ -54,17 +47,10 @@ void vp9_subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch) {
  int vp9_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr);
  int vp9_mbblock_error_xmm(MACROBLOCK *mb) {
    short *coeff_ptr =  mb->block[0].coeff;
-  short *dcoef_ptr =  mb->e_mbd.block[0].dqcoeff;
+  short *dcoef_ptr =  mb->e_mbd.plane[0].dqcoeff;
    return vp9_mbblock_error_xmm_impl(coeff_ptr, dcoef_ptr);
  }
  
-int vp9_mbuverror_xmm_impl(short *s_ptr, short *d_ptr);
-int vp9_mbuverror_xmm(MACROBLOCK *mb) {
-  short *s_ptr = &mb->coeff[256];
-  short *d_ptr = &mb->e_mbd.dqcoeff[256];
-  return vp9_mbuverror_xmm_impl(s_ptr, d_ptr);
-}
-
  void vp9_subtract_b_sse2_impl(unsigned char *z,  int src_stride,
                                short *diff, unsigned char *predictor,
                                int pitch);
author	John Koleszar <jkoleszar@google.com>
	Tue, 2 Apr 2013 21:50:40 +0000 (14:50 -0700)
committer	John Koleszar <jkoleszar@google.com>
	Thu, 4 Apr 2013 23:30:57 +0000 (16:30 -0700)
vp9/common/vp9_blockd.h		patch \| blob \| history
vp9/common/vp9_invtrans.c		patch \| blob \| history
vp9/common/vp9_mbpitch.c		patch \| blob \| history
vp9/common/vp9_rtcd_defs.sh		patch \| blob \| history
vp9/decoder/vp9_decodframe.c		patch \| blob \| history
vp9/decoder/vp9_dequantize.c		patch \| blob \| history
vp9/decoder/vp9_dequantize.h		patch \| blob \| history
vp9/decoder/vp9_detokenize.c		patch \| blob \| history
vp9/decoder/vp9_idct_blk.c		patch \| blob \| history
vp9/encoder/vp9_asm_enc_offsets.c		patch \| blob \| history
vp9/encoder/vp9_encodeintra.c		patch \| blob \| history
vp9/encoder/vp9_encodeintra.h		patch \| blob \| history
vp9/encoder/vp9_encodemb.c		patch \| blob \| history
vp9/encoder/vp9_quantize.c		patch \| blob \| history
vp9/encoder/vp9_rdopt.c		patch \| blob \| history
vp9/encoder/vp9_tokenize.c		patch \| blob \| history
vp9/encoder/x86/vp9_encodeopt.asm		patch \| blob \| history
vp9/encoder/x86/vp9_x86_csystemdependent.c		patch \| blob \| history