fix the lossless experiment
authorYaowu Xu <yaowu@google.com>
Mon, 11 Feb 2013 23:58:22 +0000 (15:58 -0800)
committerYaowu Xu <yaowu@google.com>
Wed, 13 Feb 2013 17:20:26 +0000 (09:20 -0800)
Change-Id: I95acfc1417634b52d344586ab97f0abaa9a4b256

vp9/common/vp9_blockd.h
vp9/common/vp9_invtrans.c
vp9/common/vp9_rtcd_defs.sh
vp9/decoder/vp9_decodframe.c
vp9/decoder/vp9_dequantize.h
vp9/decoder/vp9_idct_blk.c
vp9/decoder/vp9_onyxd_int.h
vp9/encoder/vp9_encodeframe.c
vp9/encoder/vp9_rdopt.c

index 0d51f06..b0c1bfa 100644 (file)
@@ -386,11 +386,28 @@ typedef struct macroblockd {
   unsigned int frames_since_golden;
   unsigned int frames_till_alt_ref_frame;
 
+#if CONFIG_LOSSLESS
+  int lossless;
+#endif
   /* Inverse transform function pointers. */
   void (*inv_xform4x4_1_x8)(int16_t *input, int16_t *output, int pitch);
   void (*inv_xform4x4_x8)(int16_t *input, int16_t *output, int pitch);
   void (*inv_walsh4x4_1)(int16_t *in, int16_t *out);
   void (*inv_walsh4x4_lossless)(int16_t *in, int16_t *out);
+  void (*idct_add)(int16_t *input, const int16_t *dq,
+    uint8_t *pred, uint8_t *output, int pitch, int stride);
+  void (*dc_idct_add)(int16_t *input, const int16_t *dq,
+    uint8_t *pred, uint8_t *output, int pitch, int stride, int dc);
+  void (*dc_only_idct_add)(int input_dc, uint8_t *pred_ptr,
+    uint8_t *dst_ptr, int pitch, int stride);
+  void (*dc_idct_add_y_block)(int16_t *q, const int16_t *dq,
+    uint8_t *pre, uint8_t *dst, int stride, uint16_t *eobs,
+    const int16_t *dc);
+  void (*idct_add_y_block)(int16_t *q, const int16_t *dq,
+    uint8_t *pre, uint8_t *dst, int stride, uint16_t *eobs);
+  void (*idct_add_uv_block)(int16_t *q, const int16_t *dq,
+    uint8_t *pre, uint8_t *dst_u, uint8_t *dst_v, int stride,
+    uint16_t *eobs);
 
   struct subpix_fn_table  subpix;
 
@@ -501,6 +518,10 @@ static TX_TYPE get_tx_type_4x4(const MACROBLOCKD *xd, const BLOCKD *b) {
   int ib = (int)(b - xd->block);
   if (ib >= 16)
     return tx_type;
+#if CONFIG_LOSSLESS
+  if (xd->lossless)
+    return tx_type;
+#endif
   // TODO(rbultje, debargha): Explore ADST usage for superblocks
   if (xd->mode_info_context->mbmi.sb_type)
     return tx_type;
index c81fe2d..241a5bc 100644 (file)
@@ -44,7 +44,7 @@ void vp9_inverse_transform_mby_4x4(MACROBLOCKD *xd) {
 
   if (has_2nd_order) {
     /* do 2nd order transform on the dc block */
-    vp9_short_inv_walsh4x4(blockd[24].dqcoeff, blockd[24].diff);
+    xd->inv_walsh4x4_lossless(blockd[24].dqcoeff, blockd[24].diff);
     recon_dcblock(xd);
   }
 
index 4dce0c9..02f8b66 100644 (file)
@@ -324,10 +324,15 @@ specialize vp9_dc_only_idct_add
 
 if [ "$CONFIG_LOSSLESS" = "yes" ]; then
 prototype void vp9_short_inv_walsh4x4_1_x8 "int16_t *input, int16_t *output, int pitch"
+specialize vp9_short_inv_walsh4x4_1_x8
 prototype void vp9_short_inv_walsh4x4_x8 "int16_t *input, int16_t *output, int pitch"
+specialize vp9_short_inv_walsh4x4_x8
 prototype void vp9_dc_only_inv_walsh_add "int input_dc, uint8_t *pred_ptr, uint8_t *dst_ptr, int pitch, int stride"
+specialize vp9_dc_only_inv_walsh_add
 prototype void vp9_short_inv_walsh4x4_1_lossless "int16_t *in, int16_t *out"
+specialize vp9_short_inv_walsh4x4_1_lossless
 prototype void vp9_short_inv_walsh4x4_lossless "int16_t *in, int16_t *out"
+specialize vp9_short_inv_walsh4x4_lossless
 fi
 
 prototype unsigned int vp9_sad32x3 "const uint8_t *src_ptr, int  src_stride, const uint8_t *ref_ptr, int ref_stride, int max_sad"
index 3324186..f103937 100644 (file)
@@ -124,37 +124,42 @@ static void mb_init_dequantizer(VP9D_COMP *pbi, MACROBLOCKD *xd) {
   }
 
 #if CONFIG_LOSSLESS
+  pbi->mb.lossless = 0;
   if (!QIndex) {
     pbi->mb.inv_xform4x4_1_x8     = vp9_short_inv_walsh4x4_1_x8;
     pbi->mb.inv_xform4x4_x8       = vp9_short_inv_walsh4x4_x8;
     pbi->mb.inv_walsh4x4_1        = vp9_short_inv_walsh4x4_1_lossless;
     pbi->mb.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4_lossless;
-    pbi->idct_add            = vp9_dequant_idct_add_lossless_c;
-    pbi->dc_idct_add         = vp9_dequant_dc_idct_add_lossless_c;
-    pbi->dc_idct_add_y_block = vp9_dequant_dc_idct_add_y_block_lossless_c;
-    pbi->idct_add_y_block    = vp9_dequant_idct_add_y_block_lossless_c;
-    pbi->idct_add_uv_block   = vp9_dequant_idct_add_uv_block_lossless_c;
+    pbi->mb.idct_add            = vp9_dequant_idct_add_lossless_c;
+    pbi->mb.dc_only_idct_add    = vp9_dc_only_inv_walsh_add_c;
+    pbi->mb.dc_idct_add         = vp9_dequant_dc_idct_add_lossless_c;
+    pbi->mb.dc_idct_add_y_block = vp9_dequant_dc_idct_add_y_block_lossless_c;
+    pbi->mb.idct_add_y_block    = vp9_dequant_idct_add_y_block_lossless_c;
+    pbi->mb.idct_add_uv_block   = vp9_dequant_idct_add_uv_block_lossless_c;
+    pbi->mb.lossless = 1;
   } else {
     pbi->mb.inv_xform4x4_1_x8     = vp9_short_idct4x4llm_1;
     pbi->mb.inv_xform4x4_x8       = vp9_short_idct4x4llm;
     pbi->mb.inv_walsh4x4_1        = vp9_short_inv_walsh4x4_1;
     pbi->mb.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4;
-    pbi->idct_add            = vp9_dequant_idct_add;
-    pbi->dc_idct_add         = vp9_dequant_dc_idct_add;
-    pbi->dc_idct_add_y_block = vp9_dequant_dc_idct_add_y_block;
-    pbi->idct_add_y_block    = vp9_dequant_idct_add_y_block;
-    pbi->idct_add_uv_block   = vp9_dequant_idct_add_uv_block;
+    pbi->mb.idct_add            = vp9_dequant_idct_add;
+    pbi->mb.dc_only_idct_add    = vp9_dc_only_idct_add_c;
+    pbi->mb.dc_idct_add         = vp9_dequant_dc_idct_add;
+    pbi->mb.dc_idct_add_y_block = vp9_dequant_dc_idct_add_y_block;
+    pbi->mb.idct_add_y_block    = vp9_dequant_idct_add_y_block;
+    pbi->mb.idct_add_uv_block   = vp9_dequant_idct_add_uv_block;
   }
 #else
   pbi->mb.inv_xform4x4_1_x8     = vp9_short_idct4x4llm_1;
   pbi->mb.inv_xform4x4_x8       = vp9_short_idct4x4llm;
   pbi->mb.inv_walsh4x4_1        = vp9_short_inv_walsh4x4_1;
   pbi->mb.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4;
-  pbi->idct_add            = vp9_dequant_idct_add;
-  pbi->dc_idct_add         = vp9_dequant_dc_idct_add;
-  pbi->dc_idct_add_y_block = vp9_dequant_dc_idct_add_y_block;
-  pbi->idct_add_y_block    = vp9_dequant_idct_add_y_block;
-  pbi->idct_add_uv_block   = vp9_dequant_idct_add_uv_block;
+  pbi->mb.idct_add            = vp9_dequant_idct_add;
+  pbi->mb.dc_only_idct_add    = vp9_dc_only_idct_add_c;
+  pbi->mb.dc_idct_add         = vp9_dequant_dc_idct_add;
+  pbi->mb.dc_idct_add_y_block = vp9_dequant_dc_idct_add_y_block;
+  pbi->mb.idct_add_y_block    = vp9_dequant_idct_add_y_block;
+  pbi->mb.idct_add_uv_block   = vp9_dequant_idct_add_uv_block;
 #endif
 
   for (i = 16; i < 24; i++) {
@@ -344,15 +349,15 @@ static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd,
       int i8x8mode = b->bmi.as_mode.first;
       b = &xd->block[16 + i];
       vp9_intra_uv4x4_predict(xd, &xd->block[16 + i], i8x8mode, b->predictor);
-      pbi->idct_add(b->qcoeff, b->dequant, b->predictor,
+      xd->idct_add(b->qcoeff, b->dequant, b->predictor,
                     *(b->base_dst) + b->dst, 8, b->dst_stride);
       b = &xd->block[20 + i];
       vp9_intra_uv4x4_predict(xd, &xd->block[20 + i], i8x8mode, b->predictor);
-      pbi->idct_add(b->qcoeff, b->dequant, b->predictor,
+      xd->idct_add(b->qcoeff, b->dequant, b->predictor,
                     *(b->base_dst) + b->dst, 8, b->dst_stride);
     }
   } else if (xd->mode_info_context->mbmi.mode == SPLITMV) {
-    pbi->idct_add_uv_block(xd->qcoeff + 16 * 16, xd->block[16].dequant,
+    xd->idct_add_uv_block(xd->qcoeff + 16 * 16, xd->block[16].dequant,
          xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer,
          xd->dst.uv_stride, xd->eobs + 16);
   } else {
@@ -399,17 +404,17 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
                                     *(b->base_dst) + b->dst, 16,
                                     b->dst_stride, b->eob);
         } else {
-          vp9_dequant_idct_add(b->qcoeff, b->dequant, b->predictor,
-                               *(b->base_dst) + b->dst, 16, b->dst_stride);
+          xd->idct_add(b->qcoeff, b->dequant, b->predictor,
+                        *(b->base_dst) + b->dst, 16, b->dst_stride);
         }
       }
       b = &xd->block[16 + i];
       vp9_intra_uv4x4_predict(xd, b, i8x8mode, b->predictor);
-      pbi->idct_add(b->qcoeff, b->dequant, b->predictor,
+      xd->idct_add(b->qcoeff, b->dequant, b->predictor,
                     *(b->base_dst) + b->dst, 8, b->dst_stride);
       b = &xd->block[20 + i];
       vp9_intra_uv4x4_predict(xd, b, i8x8mode, b->predictor);
-      pbi->idct_add(b->qcoeff, b->dequant, b->predictor,
+      xd->idct_add(b->qcoeff, b->dequant, b->predictor,
                     *(b->base_dst) + b->dst, 8, b->dst_stride);
     }
   } else if (mode == B_PRED) {
@@ -433,8 +438,8 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
                                   *(b->base_dst) + b->dst, 16, b->dst_stride,
                                   b->eob);
       } else {
-        vp9_dequant_idct_add(b->qcoeff, b->dequant, b->predictor,
-                             *(b->base_dst) + b->dst, 16, b->dst_stride);
+        xd->idct_add(b->qcoeff, b->dequant, b->predictor,
+                      *(b->base_dst) + b->dst, 16, b->dst_stride);
       }
     }
     if (!xd->mode_info_context->mbmi.mb_skip_coeff) {
@@ -443,7 +448,7 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
     xd->above_context->y2 = 0;
     xd->left_context->y2 = 0;
     vp9_build_intra_predictors_mbuv(xd);
-    pbi->idct_add_uv_block(xd->qcoeff + 16 * 16,
+    xd->idct_add_uv_block(xd->qcoeff + 16 * 16,
                            xd->block[16].dequant,
                            xd->predictor + 16 * 16,
                            xd->dst.u_buffer,
@@ -452,13 +457,13 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
                            xd->eobs + 16);
   } else if (mode == SPLITMV) {
     assert(get_2nd_order_usage(xd) == 0);
-    pbi->idct_add_y_block(xd->qcoeff,
+    xd->idct_add_y_block(xd->qcoeff,
                           xd->block[0].dequant,
                           xd->predictor,
                           xd->dst.y_buffer,
                           xd->dst.y_stride,
                           xd->eobs);
-    pbi->idct_add_uv_block(xd->qcoeff + 16 * 16,
+    xd->idct_add_uv_block(xd->qcoeff + 16 * 16,
                            xd->block[16].dequant,
                            xd->predictor + 16 * 16,
                            xd->dst.u_buffer,
@@ -495,8 +500,8 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
                                     *(b->base_dst) + b->dst, 16,
                                     b->dst_stride, b->eob);
         } else {
-          vp9_dequant_idct_add(b->qcoeff, b->dequant, b->predictor,
-                               *(b->base_dst) + b->dst, 16, b->dst_stride);
+          xd->idct_add(b->qcoeff, b->dequant, b->predictor,
+                        *(b->base_dst) + b->dst, 16, b->dst_stride);
         }
       }
     } else {
@@ -504,7 +509,7 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
       assert(get_2nd_order_usage(xd) == 1);
       vp9_dequantize_b(b);
       if (xd->eobs[24] > 1) {
-        vp9_short_inv_walsh4x4(&b->dqcoeff[0], b->diff);
+        xd->inv_walsh4x4_lossless(&b->dqcoeff[0], b->diff);
         ((int *)b->qcoeff)[0] = 0;
         ((int *)b->qcoeff)[1] = 0;
         ((int *)b->qcoeff)[2] = 0;
@@ -518,7 +523,7 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
         ((int *)b->qcoeff)[0] = 0;
       }
       vp9_dequantize_b(b);
-      pbi->dc_idct_add_y_block(xd->qcoeff,
+      xd->dc_idct_add_y_block(xd->qcoeff,
                                xd->block[0].dequant,
                                xd->predictor,
                                xd->dst.y_buffer,
@@ -526,7 +531,7 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
                                xd->eobs,
                                xd->block[24].diff);
     }
-    pbi->idct_add_uv_block(xd->qcoeff + 16 * 16,
+    xd->idct_add_uv_block(xd->qcoeff + 16 * 16,
                            xd->block[16].dequant,
                            xd->predictor + 16 * 16,
                            xd->dst.u_buffer,
@@ -644,7 +649,7 @@ static void decode_4x4_sb(VP9D_COMP *pbi, MACROBLOCKD *xd,
             + x_idx * 16 + (i & 3) * 4,
             xd->dst.y_stride, xd->dst.y_stride, b->eob);
       } else {
-        vp9_dequant_idct_add_c(
+        xd->idct_add(
             b->qcoeff, b->dequant,
             xd->dst.y_buffer + (y_idx * 16 + (i / 4) * 4) * xd->dst.y_stride
             + x_idx * 16 + (i & 3) * 4,
@@ -656,7 +661,7 @@ static void decode_4x4_sb(VP9D_COMP *pbi, MACROBLOCKD *xd,
   } else {
     vp9_dequantize_b(b);
     if (xd->eobs[24] > 1) {
-      vp9_short_inv_walsh4x4(&b->dqcoeff[0], b->diff);
+      xd->inv_walsh4x4_lossless(&b->dqcoeff[0], b->diff);
       ((int *)b->qcoeff)[0] = 0;
       ((int *)b->qcoeff)[1] = 0;
       ((int *)b->qcoeff)[2] = 0;
index 2a0ae80..0fa5144 100644 (file)
@@ -42,20 +42,6 @@ extern void vp9_dequant_idct_add_uv_block_lossless_c(int16_t *q, const int16_t *
                                                      uint16_t *eobs);
 #endif
 
-typedef void (*vp9_dequant_idct_add_fn_t)(int16_t *input, const int16_t *dq,
-    unsigned char *pred, unsigned char *output, int pitch, int stride);
-typedef void(*vp9_dequant_dc_idct_add_fn_t)(int16_t *input, const int16_t *dq,
-    unsigned char *pred, unsigned char *output, int pitch, int stride, int dc);
-
-typedef void(*vp9_dequant_dc_idct_add_y_block_fn_t)(int16_t *q, const int16_t *dq,
-    unsigned char *pre, unsigned char *dst, int stride, uint16_t *eobs,
-    const int16_t *dc);
-typedef void(*vp9_dequant_idct_add_y_block_fn_t)(int16_t *q, const int16_t *dq,
-    unsigned char *pre, unsigned char *dst, int stride, uint16_t *eobs);
-typedef void(*vp9_dequant_idct_add_uv_block_fn_t)(int16_t *q, const int16_t *dq,
-    unsigned char *pre, unsigned char *dst_u, unsigned char *dst_v, int stride,
-    uint16_t *eobs);
-
 void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, int16_t *input, const int16_t *dq,
                                     unsigned char *pred, unsigned char *dest,
                                     int pitch, int stride, uint16_t eobs);
index 152527c..ad93b49 100644 (file)
@@ -51,9 +51,9 @@ void vp9_dequant_dc_idct_add_y_block_4x4_inplace_c(int16_t *q,
   for (i = 0; i < 4; i++) {
     for (j = 0; j < 4; j++) {
       if (*eobs++ > 1)
-        vp9_dequant_dc_idct_add_c(q, dq, dst, dst, stride, stride, dc[0]);
+        xd->dc_idct_add(q, dq, dst, dst, stride, stride, dc[0]);
       else
-        vp9_dc_only_idct_add_c(dc[0], dst, dst, stride, stride);
+        xd->dc_only_idct_add(dc[0], dst, dst, stride, stride);
 
       q   += 16;
       dst += 4;
@@ -143,9 +143,9 @@ void vp9_dequant_idct_add_uv_block_4x4_inplace_c(int16_t *q, const int16_t *dq,
   for (i = 0; i < 2; i++) {
     for (j = 0; j < 2; j++) {
       if (*eobs++ > 1) {
-        vp9_dequant_idct_add_c(q, dq, dstu, dstu, stride, stride);
+        xd->idct_add(q, dq, dstu, dstu, stride, stride);
       } else {
-        vp9_dc_only_idct_add_c(q[0]*dq[0], dstu, dstu, stride, stride);
+        xd->dc_only_idct_add(q[0]*dq[0], dstu, dstu, stride, stride);
         ((int *)q)[0] = 0;
       }
 
@@ -159,9 +159,9 @@ void vp9_dequant_idct_add_uv_block_4x4_inplace_c(int16_t *q, const int16_t *dq,
   for (i = 0; i < 2; i++) {
     for (j = 0; j < 2; j++) {
       if (*eobs++ > 1) {
-        vp9_dequant_idct_add_c(q, dq, dstv, dstv, stride, stride);
+        xd->idct_add(q, dq, dstv, dstv, stride, stride);
       } else {
-        vp9_dc_only_idct_add_c(q[0]*dq[0], dstv, dstv, stride, stride);
+        xd->dc_only_idct_add(q[0]*dq[0], dstv, dstv, stride, stride);
         ((int *)q)[0] = 0;
       }
 
index e04b9f5..0b0b903 100644 (file)
@@ -70,12 +70,6 @@ typedef struct VP9Decompressor {
 
   DETOK detoken;
 
-  vp9_dequant_idct_add_fn_t            idct_add;
-  vp9_dequant_dc_idct_add_fn_t         dc_idct_add;
-  vp9_dequant_dc_idct_add_y_block_fn_t dc_idct_add_y_block;
-  vp9_dequant_idct_add_y_block_fn_t    idct_add_y_block;
-  vp9_dequant_idct_add_uv_block_fn_t   idct_add_uv_block;
-
   int refresh_frame_flags;
   vp9_prob prob_skip_false;
 
index 927a1b9..1b674f1 100644 (file)
@@ -1543,8 +1543,10 @@ void vp9_encode_frame(VP9_COMP *cpi) {
 
     /* transform size (4x4, 8x8, 16x16 or select-per-mb) selection */
 #if CONFIG_LOSSLESS
+    cpi->mb.e_mbd.lossless = 0;
     if (cpi->oxcf.lossless) {
       txfm_type = ONLY_4X4;
+      cpi->mb.e_mbd.lossless = 1;
     } else
 #endif
     /* FIXME (rbultje)
index 317209b..762a929 100644 (file)
@@ -698,7 +698,7 @@ static void macro_block_yrd_16x16(MACROBLOCK *mb, int *Rate, int *Distortion,
   // TODO(jingning) is it possible to quickly determine whether to force
   //                trailing coefficients to be zero, instead of running trellis
   //                optimization in the rate-distortion optimization loop?
-  if (mb->e_mbd.mode_info_context->mbmi.mode < I8X8_PRED)
+  if (mb->optimize && mb->e_mbd.mode_info_context->mbmi.mode < I8X8_PRED)
     vp9_optimize_mby_16x16(mb);
 
   d = vp9_mbblock_error(mb, 0);