From 17db5d00be2f005f11428edf0fd23265d04d100f Mon Sep 17 00:00:00 2001
From: Yaowu Xu <yaowu@google.com>
Date: Mon, 11 Feb 2013 21:14:46 -0800
Subject: [PATCH] enable bitstream lossless support

1. Added a bit in frame header to  to indicate if a frame is encoded
in lossless mode, so decoder does not make the decision based on Q0
2. Minor changes to make sure that lossy coding works same as when
the lossless experiment is not enabled.
3. Renamed function pointers for transforms to be consistent, using
prefix fwd_txm and inv_txm for forward and inverse respectively

To encode in lossless mode, using "--lossless=1 --min-q=0 --max-q=0"
with vpxenc.

Change-Id: Ifae53b26d2ffbe378d707e29d96817b8a5e6c068
---
 vp9/common/vp9_blockd.h       |  22 ++++----
 vp9/common/vp9_idctllm.c      |   7 +--
 vp9/common/vp9_invtrans.c     |   6 +--
 vp9/decoder/vp9_decodframe.c  | 120 ++++++++++++++++++++----------------------
 vp9/decoder/vp9_idct_blk.c    |  12 ++---
 vp9/encoder/vp9_bitstream.c   |   8 ++-
 vp9/encoder/vp9_block.h       |  12 ++---
 vp9/encoder/vp9_encodeintra.c |  10 ++--
 vp9/encoder/vp9_encodemb.c    |  22 ++++----
 vp9/encoder/vp9_onyx_if.c     |  48 ++++++++---------
 vp9/encoder/vp9_quantize.c    |  12 ++---
 vp9/encoder/vp9_rdopt.c       |  20 +++----
 12 files changed, 147 insertions(+), 152 deletions(-)

diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h
index b0c1bfa..5c4c238 100644
--- a/vp9/common/vp9_blockd.h
+++ b/vp9/common/vp9_blockd.h
@@ -390,22 +390,22 @@ typedef struct macroblockd {
   int lossless;
 #endif
   /* Inverse transform function pointers. */
-  void (*inv_xform4x4_1_x8)(int16_t *input, int16_t *output, int pitch);
-  void (*inv_xform4x4_x8)(int16_t *input, int16_t *output, int pitch);
-  void (*inv_walsh4x4_1)(int16_t *in, int16_t *out);
-  void (*inv_walsh4x4_lossless)(int16_t *in, int16_t *out);
-  void (*idct_add)(int16_t *input, const int16_t *dq,
+  void (*inv_txm4x4_1)(int16_t *input, int16_t *output, int pitch);
+  void (*inv_txm4x4)(int16_t *input, int16_t *output, int pitch);
+  void (*inv_2ndtxm4x4_1)(int16_t *in, int16_t *out);
+  void (*inv_2ndtxm4x4)(int16_t *in, int16_t *out);
+  void (*itxm_add)(int16_t *input, const int16_t *dq,
     uint8_t *pred, uint8_t *output, int pitch, int stride);
-  void (*dc_idct_add)(int16_t *input, const int16_t *dq,
+  void (*dc_itxm_add)(int16_t *input, const int16_t *dq,
     uint8_t *pred, uint8_t *output, int pitch, int stride, int dc);
-  void (*dc_only_idct_add)(int input_dc, uint8_t *pred_ptr,
+  void (*dc_only_itxm_add)(int input_dc, uint8_t *pred_ptr,
     uint8_t *dst_ptr, int pitch, int stride);
-  void (*dc_idct_add_y_block)(int16_t *q, const int16_t *dq,
+  void (*dc_itxm_add_y_block)(int16_t *q, const int16_t *dq,
     uint8_t *pre, uint8_t *dst, int stride, uint16_t *eobs,
     const int16_t *dc);
-  void (*idct_add_y_block)(int16_t *q, const int16_t *dq,
+  void (*itxm_add_y_block)(int16_t *q, const int16_t *dq,
     uint8_t *pre, uint8_t *dst, int stride, uint16_t *eobs);
-  void (*idct_add_uv_block)(int16_t *q, const int16_t *dq,
+  void (*itxm_add_uv_block)(int16_t *q, const int16_t *dq,
     uint8_t *pre, uint8_t *dst_u, uint8_t *dst_v, int stride,
     uint16_t *eobs);
 
@@ -520,7 +520,7 @@ static TX_TYPE get_tx_type_4x4(const MACROBLOCKD *xd, const BLOCKD *b) {
     return tx_type;
 #if CONFIG_LOSSLESS
   if (xd->lossless)
-    return tx_type;
+    return DCT_DCT;
 #endif
   // TODO(rbultje, debargha): Explore ADST usage for superblocks
   if (xd->mode_info_context->mbmi.sb_type)
diff --git a/vp9/common/vp9_idctllm.c b/vp9/common/vp9_idctllm.c
index b27b34c..5afce6e 100644
--- a/vp9/common/vp9_idctllm.c
+++ b/vp9/common/vp9_idctllm.c
@@ -476,12 +476,13 @@ void vp9_short_inv_walsh4x4_1_x8_c(int16_t *in, int16_t *out, int pitch) {
   }
 }
 
-void vp9_dc_only_inv_walsh_add_c(short input_dc, uint8_t *pred_ptr,
+void vp9_dc_only_inv_walsh_add_c(int input_dc, uint8_t *pred_ptr,
                                  uint8_t *dst_ptr,
                                  int pitch, int stride) {
   int r, c;
-  short tmp[16];
-  vp9_short_inv_walsh4x4_1_x8_c(&input_dc, tmp, 4 << 1);
+  int16_t dc = input_dc;
+  int16_t tmp[16];
+  vp9_short_inv_walsh4x4_1_x8_c(&dc, tmp, 4 << 1);
 
   for (r = 0; r < 4; r++) {
     for (c = 0; c < 4; c++) {
diff --git a/vp9/common/vp9_invtrans.c b/vp9/common/vp9_invtrans.c
index 241a5bc..94cfb62 100644
--- a/vp9/common/vp9_invtrans.c
+++ b/vp9/common/vp9_invtrans.c
@@ -32,9 +32,9 @@ static void recon_dcblock_8x8(MACROBLOCKD *xd) {
 void vp9_inverse_transform_b_4x4(MACROBLOCKD *xd, int block, int pitch) {
   BLOCKD *b = &xd->block[block];
   if (b->eob <= 1)
-    xd->inv_xform4x4_1_x8(b->dqcoeff, b->diff, pitch);
+    xd->inv_txm4x4_1(b->dqcoeff, b->diff, pitch);
   else
-    xd->inv_xform4x4_x8(b->dqcoeff, b->diff, pitch);
+    xd->inv_txm4x4(b->dqcoeff, b->diff, pitch);
 }
 
 void vp9_inverse_transform_mby_4x4(MACROBLOCKD *xd) {
@@ -44,7 +44,7 @@ void vp9_inverse_transform_mby_4x4(MACROBLOCKD *xd) {
 
   if (has_2nd_order) {
     /* do 2nd order transform on the dc block */
-    xd->inv_walsh4x4_lossless(blockd[24].dqcoeff, blockd[24].diff);
+    xd->inv_2ndtxm4x4(blockd[24].dqcoeff, blockd[24].diff);
     recon_dcblock(xd);
   }
 
diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c
index f103937..189a2f4 100644
--- a/vp9/decoder/vp9_decodframe.c
+++ b/vp9/decoder/vp9_decodframe.c
@@ -123,43 +123,30 @@ static void mb_init_dequantizer(VP9D_COMP *pbi, MACROBLOCKD *xd) {
     xd->block[i].dequant = pc->Y1dequant[QIndex];
   }
 
+  xd->inv_txm4x4_1        = vp9_short_idct4x4llm_1;
+  xd->inv_txm4x4          = vp9_short_idct4x4llm;
+  xd->inv_2ndtxm4x4_1     = vp9_short_inv_walsh4x4_1;
+  xd->inv_2ndtxm4x4       = vp9_short_inv_walsh4x4;
+  xd->itxm_add            = vp9_dequant_idct_add;
+  xd->dc_only_itxm_add    = vp9_dc_only_idct_add_c;
+  xd->dc_itxm_add         = vp9_dequant_dc_idct_add;
+  xd->dc_itxm_add_y_block = vp9_dequant_dc_idct_add_y_block;
+  xd->itxm_add_y_block    = vp9_dequant_idct_add_y_block;
+  xd->itxm_add_uv_block   = vp9_dequant_idct_add_uv_block;
 #if CONFIG_LOSSLESS
-  pbi->mb.lossless = 0;
-  if (!QIndex) {
-    pbi->mb.inv_xform4x4_1_x8     = vp9_short_inv_walsh4x4_1_x8;
-    pbi->mb.inv_xform4x4_x8       = vp9_short_inv_walsh4x4_x8;
-    pbi->mb.inv_walsh4x4_1        = vp9_short_inv_walsh4x4_1_lossless;
-    pbi->mb.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4_lossless;
-    pbi->mb.idct_add            = vp9_dequant_idct_add_lossless_c;
-    pbi->mb.dc_only_idct_add    = vp9_dc_only_inv_walsh_add_c;
-    pbi->mb.dc_idct_add         = vp9_dequant_dc_idct_add_lossless_c;
-    pbi->mb.dc_idct_add_y_block = vp9_dequant_dc_idct_add_y_block_lossless_c;
-    pbi->mb.idct_add_y_block    = vp9_dequant_idct_add_y_block_lossless_c;
-    pbi->mb.idct_add_uv_block   = vp9_dequant_idct_add_uv_block_lossless_c;
-    pbi->mb.lossless = 1;
-  } else {
-    pbi->mb.inv_xform4x4_1_x8     = vp9_short_idct4x4llm_1;
-    pbi->mb.inv_xform4x4_x8       = vp9_short_idct4x4llm;
-    pbi->mb.inv_walsh4x4_1        = vp9_short_inv_walsh4x4_1;
-    pbi->mb.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4;
-    pbi->mb.idct_add            = vp9_dequant_idct_add;
-    pbi->mb.dc_only_idct_add    = vp9_dc_only_idct_add_c;
-    pbi->mb.dc_idct_add         = vp9_dequant_dc_idct_add;
-    pbi->mb.dc_idct_add_y_block = vp9_dequant_dc_idct_add_y_block;
-    pbi->mb.idct_add_y_block    = vp9_dequant_idct_add_y_block;
-    pbi->mb.idct_add_uv_block   = vp9_dequant_idct_add_uv_block;
+  if (xd->lossless) {
+    assert(QIndex == 0);
+    xd->inv_txm4x4_1        = vp9_short_inv_walsh4x4_1_x8;
+    xd->inv_txm4x4          = vp9_short_inv_walsh4x4_x8;
+    xd->inv_2ndtxm4x4_1     = vp9_short_inv_walsh4x4_1_lossless;
+    xd->inv_2ndtxm4x4       = vp9_short_inv_walsh4x4_lossless;
+    xd->itxm_add            = vp9_dequant_idct_add_lossless_c;
+    xd->dc_only_itxm_add    = vp9_dc_only_inv_walsh_add_c;
+    xd->dc_itxm_add         = vp9_dequant_dc_idct_add_lossless_c;
+    xd->dc_itxm_add_y_block = vp9_dequant_dc_idct_add_y_block_lossless_c;
+    xd->itxm_add_y_block    = vp9_dequant_idct_add_y_block_lossless_c;
+    xd->itxm_add_uv_block   = vp9_dequant_idct_add_uv_block_lossless_c;
   }
-#else
-  pbi->mb.inv_xform4x4_1_x8     = vp9_short_idct4x4llm_1;
-  pbi->mb.inv_xform4x4_x8       = vp9_short_idct4x4llm;
-  pbi->mb.inv_walsh4x4_1        = vp9_short_inv_walsh4x4_1;
-  pbi->mb.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4;
-  pbi->mb.idct_add            = vp9_dequant_idct_add;
-  pbi->mb.dc_only_idct_add    = vp9_dc_only_idct_add_c;
-  pbi->mb.dc_idct_add         = vp9_dequant_dc_idct_add;
-  pbi->mb.dc_idct_add_y_block = vp9_dequant_dc_idct_add_y_block;
-  pbi->mb.idct_add_y_block    = vp9_dequant_idct_add_y_block;
-  pbi->mb.idct_add_uv_block   = vp9_dequant_idct_add_uv_block;
 #endif
 
   for (i = 16; i < 24; i++) {
@@ -349,15 +336,15 @@ static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd,
       int i8x8mode = b->bmi.as_mode.first;
       b = &xd->block[16 + i];
       vp9_intra_uv4x4_predict(xd, &xd->block[16 + i], i8x8mode, b->predictor);
-      xd->idct_add(b->qcoeff, b->dequant, b->predictor,
+      xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
                     *(b->base_dst) + b->dst, 8, b->dst_stride);
       b = &xd->block[20 + i];
       vp9_intra_uv4x4_predict(xd, &xd->block[20 + i], i8x8mode, b->predictor);
-      xd->idct_add(b->qcoeff, b->dequant, b->predictor,
+      xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
                     *(b->base_dst) + b->dst, 8, b->dst_stride);
     }
   } else if (xd->mode_info_context->mbmi.mode == SPLITMV) {
-    xd->idct_add_uv_block(xd->qcoeff + 16 * 16, xd->block[16].dequant,
+    xd->itxm_add_uv_block(xd->qcoeff + 16 * 16, xd->block[16].dequant,
          xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer,
          xd->dst.uv_stride, xd->eobs + 16);
   } else {
@@ -404,17 +391,17 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
                                     *(b->base_dst) + b->dst, 16,
                                     b->dst_stride, b->eob);
         } else {
-          xd->idct_add(b->qcoeff, b->dequant, b->predictor,
+          xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
                         *(b->base_dst) + b->dst, 16, b->dst_stride);
         }
       }
       b = &xd->block[16 + i];
       vp9_intra_uv4x4_predict(xd, b, i8x8mode, b->predictor);
-      xd->idct_add(b->qcoeff, b->dequant, b->predictor,
+      xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
                     *(b->base_dst) + b->dst, 8, b->dst_stride);
       b = &xd->block[20 + i];
       vp9_intra_uv4x4_predict(xd, b, i8x8mode, b->predictor);
-      xd->idct_add(b->qcoeff, b->dequant, b->predictor,
+      xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
                     *(b->base_dst) + b->dst, 8, b->dst_stride);
     }
   } else if (mode == B_PRED) {
@@ -438,7 +425,7 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
                                   *(b->base_dst) + b->dst, 16, b->dst_stride,
                                   b->eob);
       } else {
-        xd->idct_add(b->qcoeff, b->dequant, b->predictor,
+        xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
                       *(b->base_dst) + b->dst, 16, b->dst_stride);
       }
     }
@@ -448,7 +435,7 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
     xd->above_context->y2 = 0;
     xd->left_context->y2 = 0;
     vp9_build_intra_predictors_mbuv(xd);
-    xd->idct_add_uv_block(xd->qcoeff + 16 * 16,
+    xd->itxm_add_uv_block(xd->qcoeff + 16 * 16,
                            xd->block[16].dequant,
                            xd->predictor + 16 * 16,
                            xd->dst.u_buffer,
@@ -457,13 +444,13 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
                            xd->eobs + 16);
   } else if (mode == SPLITMV) {
     assert(get_2nd_order_usage(xd) == 0);
-    xd->idct_add_y_block(xd->qcoeff,
+    xd->itxm_add_y_block(xd->qcoeff,
                           xd->block[0].dequant,
                           xd->predictor,
                           xd->dst.y_buffer,
                           xd->dst.y_stride,
                           xd->eobs);
-    xd->idct_add_uv_block(xd->qcoeff + 16 * 16,
+    xd->itxm_add_uv_block(xd->qcoeff + 16 * 16,
                            xd->block[16].dequant,
                            xd->predictor + 16 * 16,
                            xd->dst.u_buffer,
@@ -500,7 +487,7 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
                                     *(b->base_dst) + b->dst, 16,
                                     b->dst_stride, b->eob);
         } else {
-          xd->idct_add(b->qcoeff, b->dequant, b->predictor,
+          xd->itxm_add(b->qcoeff, b->dequant, b->predictor,
                         *(b->base_dst) + b->dst, 16, b->dst_stride);
         }
       }
@@ -509,7 +496,7 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
       assert(get_2nd_order_usage(xd) == 1);
       vp9_dequantize_b(b);
       if (xd->eobs[24] > 1) {
-        xd->inv_walsh4x4_lossless(&b->dqcoeff[0], b->diff);
+        xd->inv_2ndtxm4x4(&b->dqcoeff[0], b->diff);
         ((int *)b->qcoeff)[0] = 0;
         ((int *)b->qcoeff)[1] = 0;
         ((int *)b->qcoeff)[2] = 0;
@@ -519,11 +506,11 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
         ((int *)b->qcoeff)[6] = 0;
         ((int *)b->qcoeff)[7] = 0;
       } else {
-        xd->inv_walsh4x4_1(&b->dqcoeff[0], b->diff);
+        xd->inv_2ndtxm4x4_1(&b->dqcoeff[0], b->diff);
         ((int *)b->qcoeff)[0] = 0;
       }
       vp9_dequantize_b(b);
-      xd->dc_idct_add_y_block(xd->qcoeff,
+      xd->dc_itxm_add_y_block(xd->qcoeff,
                                xd->block[0].dequant,
                                xd->predictor,
                                xd->dst.y_buffer,
@@ -531,7 +518,7 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd,
                                xd->eobs,
                                xd->block[24].diff);
     }
-    xd->idct_add_uv_block(xd->qcoeff + 16 * 16,
+    xd->itxm_add_uv_block(xd->qcoeff + 16 * 16,
                            xd->block[16].dequant,
                            xd->predictor + 16 * 16,
                            xd->dst.u_buffer,
@@ -649,7 +636,7 @@ static void decode_4x4_sb(VP9D_COMP *pbi, MACROBLOCKD *xd,
             + x_idx * 16 + (i & 3) * 4,
             xd->dst.y_stride, xd->dst.y_stride, b->eob);
       } else {
-        xd->idct_add(
+        xd->itxm_add(
             b->qcoeff, b->dequant,
             xd->dst.y_buffer + (y_idx * 16 + (i / 4) * 4) * xd->dst.y_stride
             + x_idx * 16 + (i & 3) * 4,
@@ -661,7 +648,7 @@ static void decode_4x4_sb(VP9D_COMP *pbi, MACROBLOCKD *xd,
   } else {
     vp9_dequantize_b(b);
     if (xd->eobs[24] > 1) {
-      xd->inv_walsh4x4_lossless(&b->dqcoeff[0], b->diff);
+      xd->inv_2ndtxm4x4(&b->dqcoeff[0], b->diff);
       ((int *)b->qcoeff)[0] = 0;
       ((int *)b->qcoeff)[1] = 0;
       ((int *)b->qcoeff)[2] = 0;
@@ -671,7 +658,7 @@ static void decode_4x4_sb(VP9D_COMP *pbi, MACROBLOCKD *xd,
       ((int *)b->qcoeff)[6] = 0;
       ((int *)b->qcoeff)[7] = 0;
     } else {
-      xd->inv_walsh4x4_1(&b->dqcoeff[0], b->diff);
+      xd->inv_2ndtxm4x4_1(&b->dqcoeff[0], b->diff);
       ((int *)b->qcoeff)[0] = 0;
     }
     vp9_dequant_dc_idct_add_y_block_4x4_inplace_c(
@@ -1533,17 +1520,24 @@ int vp9_decode_frame(VP9D_COMP *pbi, const unsigned char **p_data_end) {
 
   pc->sb64_coded = vp9_read_literal(&header_bc, 8);
   pc->sb32_coded = vp9_read_literal(&header_bc, 8);
-
-  /* Read the loop filter level and type */
-  pc->txfm_mode = vp9_read_literal(&header_bc, 2);
-  if (pc->txfm_mode == 3)
-    pc->txfm_mode += vp9_read_bit(&header_bc);
-  if (pc->txfm_mode == TX_MODE_SELECT) {
-    pc->prob_tx[0] = vp9_read_literal(&header_bc, 8);
-    pc->prob_tx[1] = vp9_read_literal(&header_bc, 8);
-    pc->prob_tx[2] = vp9_read_literal(&header_bc, 8);
+#if CONFIG_LOSSLESS
+  xd->lossless = vp9_read_bit(&header_bc);
+  if (xd->lossless) {
+    pc->txfm_mode = ONLY_4X4;
+  }
+  else
+#endif
+  {
+    /* Read the loop filter level and type */
+    pc->txfm_mode = vp9_read_literal(&header_bc, 2);
+    if (pc->txfm_mode == 3)
+      pc->txfm_mode += vp9_read_bit(&header_bc);
+    if (pc->txfm_mode == TX_MODE_SELECT) {
+      pc->prob_tx[0] = vp9_read_literal(&header_bc, 8);
+      pc->prob_tx[1] = vp9_read_literal(&header_bc, 8);
+      pc->prob_tx[2] = vp9_read_literal(&header_bc, 8);
+    }
   }
-
   pc->filter_type = (LOOPFILTERTYPE) vp9_read_bit(&header_bc);
   pc->filter_level = vp9_read_literal(&header_bc, 6);
   pc->sharpness_level = vp9_read_literal(&header_bc, 3);
diff --git a/vp9/decoder/vp9_idct_blk.c b/vp9/decoder/vp9_idct_blk.c
index ad93b49..c3f1464 100644
--- a/vp9/decoder/vp9_idct_blk.c
+++ b/vp9/decoder/vp9_idct_blk.c
@@ -51,9 +51,9 @@ void vp9_dequant_dc_idct_add_y_block_4x4_inplace_c(int16_t *q,
   for (i = 0; i < 4; i++) {
     for (j = 0; j < 4; j++) {
       if (*eobs++ > 1)
-        xd->dc_idct_add(q, dq, dst, dst, stride, stride, dc[0]);
+        xd->dc_itxm_add(q, dq, dst, dst, stride, stride, dc[0]);
       else
-        xd->dc_only_idct_add(dc[0], dst, dst, stride, stride);
+        xd->dc_only_itxm_add(dc[0], dst, dst, stride, stride);
 
       q   += 16;
       dst += 4;
@@ -143,9 +143,9 @@ void vp9_dequant_idct_add_uv_block_4x4_inplace_c(int16_t *q, const int16_t *dq,
   for (i = 0; i < 2; i++) {
     for (j = 0; j < 2; j++) {
       if (*eobs++ > 1) {
-        xd->idct_add(q, dq, dstu, dstu, stride, stride);
+        xd->itxm_add(q, dq, dstu, dstu, stride, stride);
       } else {
-        xd->dc_only_idct_add(q[0]*dq[0], dstu, dstu, stride, stride);
+        xd->dc_only_itxm_add(q[0]*dq[0], dstu, dstu, stride, stride);
         ((int *)q)[0] = 0;
       }
 
@@ -159,9 +159,9 @@ void vp9_dequant_idct_add_uv_block_4x4_inplace_c(int16_t *q, const int16_t *dq,
   for (i = 0; i < 2; i++) {
     for (j = 0; j < 2; j++) {
       if (*eobs++ > 1) {
-        xd->idct_add(q, dq, dstv, dstv, stride, stride);
+        xd->itxm_add(q, dq, dstv, dstv, stride, stride);
       } else {
-        xd->dc_only_idct_add(q[0]*dq[0], dstv, dstv, stride, stride);
+        xd->dc_only_itxm_add(q[0]*dq[0], dstv, dstv, stride, stride);
         ((int *)q)[0] = 0;
       }
 
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index a3c4078..89a1c2c 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -1667,7 +1667,13 @@ void vp9_pack_bitstream(VP9_COMP *cpi, unsigned char *dest,
   vp9_write_literal(&header_bc, pc->sb64_coded, 8);
   pc->sb32_coded = get_binary_prob(cpi->sb32_count[0], cpi->sb32_count[1]);
   vp9_write_literal(&header_bc, pc->sb32_coded, 8);
-
+#if CONFIG_LOSSLESS
+  vp9_write_bit(&header_bc, cpi->oxcf.lossless);
+  if (cpi->oxcf.lossless) {
+    pc->txfm_mode = ONLY_4X4;
+  }
+  else
+#endif
   {
     if (pc->txfm_mode == TX_MODE_SELECT) {
       pc->prob_tx[0] = get_prob(cpi->txfm_count_32x32p[TX_4X4] +
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index 91d4c45..d5110c8 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -169,14 +169,14 @@ typedef struct macroblock {
   PICK_MODE_CONTEXT sb32_context[4];
   PICK_MODE_CONTEXT sb64_context;
 
-  void (*vp9_short_fdct4x4)(int16_t *input, int16_t *output, int pitch);
-  void (*vp9_short_fdct8x4)(int16_t *input, int16_t *output, int pitch);
-  void (*short_walsh4x4)(int16_t *input, int16_t *output, int pitch);
+  void (*fwd_txm4x4)(int16_t *input, int16_t *output, int pitch);
+  void (*fwd_txm8x4)(int16_t *input, int16_t *output, int pitch);
+  void (*fwd_2ndtxm4x4)(int16_t *input, int16_t *output, int pitch);
+  void (*fwd_txm8x8)(int16_t *input, int16_t *output, int pitch);
+  void (*fwd_txm16x16)(int16_t *input, int16_t *output, int pitch);
+  void (*fwd_2ndtxm2x2)(int16_t *input, int16_t *output, int pitch);
   void (*quantize_b_4x4)(BLOCK *b, BLOCKD *d);
   void (*quantize_b_4x4_pair)(BLOCK *b1, BLOCK *b2, BLOCKD *d0, BLOCKD *d1);
-  void (*vp9_short_fdct8x8)(int16_t *input, int16_t *output, int pitch);
-  void (*vp9_short_fdct16x16)(int16_t *input, int16_t *output, int pitch);
-  void (*short_fhaar2x2)(int16_t *input, int16_t *output, int pitch);
   void (*quantize_b_16x16)(BLOCK *b, BLOCKD *d);
   void (*quantize_b_8x8)(BLOCK *b, BLOCKD *d);
   void (*quantize_b_2x2)(BLOCK *b, BLOCKD *d);
diff --git a/vp9/encoder/vp9_encodeintra.c b/vp9/encoder/vp9_encodeintra.c
index fa72297..62afb71 100644
--- a/vp9/encoder/vp9_encodeintra.c
+++ b/vp9/encoder/vp9_encodeintra.c
@@ -58,7 +58,7 @@ void vp9_encode_intra4x4block(MACROBLOCK *x, int ib) {
     vp9_ht_quantize_b_4x4(be, b, tx_type);
     vp9_ihtllm(b->dqcoeff, b->diff, 32, tx_type, 4, b->eob);
   } else {
-    x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);
+    x->fwd_txm4x4(be->src_diff, be->coeff, 32);
     x->quantize_b_4x4(be, b) ;
     vp9_inverse_transform_b_4x4(&x->e_mbd, ib, 32);
   }
@@ -161,7 +161,7 @@ void vp9_encode_intra8x8(MACROBLOCK *x, int ib) {
                    tx_type, 8, xd->block[idx].eob);
 #endif
     } else {
-      x->vp9_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32);
+      x->fwd_txm8x8(be->src_diff, (x->block + idx)->coeff, 32);
       x->quantize_b_8x8(x->block + idx, xd->block + idx);
       vp9_short_idct8x8(xd->block[idx].dqcoeff, xd->block[ib].diff, 32);
     }
@@ -175,13 +175,13 @@ void vp9_encode_intra8x8(MACROBLOCK *x, int ib) {
         vp9_ht_quantize_b_4x4(be, b, tx_type);
         vp9_ihtllm(b->dqcoeff, b->diff, 32, tx_type, 4, b->eob);
       } else if (!(i & 1) && get_tx_type_4x4(xd, b + 1) == DCT_DCT) {
-        x->vp9_short_fdct8x4(be->src_diff, be->coeff, 32);
+        x->fwd_txm8x4(be->src_diff, be->coeff, 32);
         x->quantize_b_4x4_pair(be, be + 1, b, b + 1);
         vp9_inverse_transform_b_4x4(xd, ib + iblock[i], 32);
         vp9_inverse_transform_b_4x4(xd, ib + iblock[i] + 1, 32);
         i++;
       } else {
-        x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);
+        x->fwd_txm4x4(be->src_diff, be->coeff, 32);
         x->quantize_b_4x4(be, b);
         vp9_inverse_transform_b_4x4(xd, ib + iblock[i], 32);
       }
@@ -214,7 +214,7 @@ static void encode_intra_uv4x4(MACROBLOCK *x, int ib,
 
   vp9_subtract_b(be, b, 8);
 
-  x->vp9_short_fdct4x4(be->src_diff, be->coeff, 16);
+  x->fwd_txm4x4(be->src_diff, be->coeff, 16);
   x->quantize_b_4x4(be, b);
   vp9_inverse_transform_b_4x4(&x->e_mbd, ib, 16);
 
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index 52eabf1..fad55f7 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -188,11 +188,11 @@ void vp9_transform_mby_4x4(MACROBLOCK *x) {
       assert(has_2nd_order == 0);
       vp9_fht_c(b->src_diff, 32, b->coeff, tx_type, 4);
     } else if (!(i & 1) && get_tx_type_4x4(xd, &xd->block[i + 1]) == DCT_DCT) {
-      x->vp9_short_fdct8x4(&x->block[i].src_diff[0],
+      x->fwd_txm8x4(&x->block[i].src_diff[0],
                            &x->block[i].coeff[0], 32);
       i++;
     } else {
-      x->vp9_short_fdct4x4(&x->block[i].src_diff[0],
+      x->fwd_txm4x4(&x->block[i].src_diff[0],
                            &x->block[i].coeff[0], 32);
     }
   }
@@ -202,7 +202,7 @@ void vp9_transform_mby_4x4(MACROBLOCK *x) {
     build_dcblock_4x4(x);
 
     // do 2nd order transform on the dc block
-    x->short_walsh4x4(&x->block[24].src_diff[0],
+    x->fwd_2ndtxm4x4(&x->block[24].src_diff[0],
                       &x->block[24].coeff[0], 8);
   } else {
     vpx_memset(x->block[24].coeff, 0, 16 * sizeof(x->block[24].coeff[0]));
@@ -213,7 +213,7 @@ void vp9_transform_mbuv_4x4(MACROBLOCK *x) {
   int i;
 
   for (i = 16; i < 24; i += 2) {
-    x->vp9_short_fdct8x4(&x->block[i].src_diff[0],
+    x->fwd_txm8x4(&x->block[i].src_diff[0],
                          &x->block[i].coeff[0], 16);
   }
 }
@@ -253,7 +253,7 @@ void vp9_transform_mby_8x8(MACROBLOCK *x) {
       assert(has_2nd_order == 0);
       vp9_fht_c(b->src_diff, 32, b->coeff, tx_type, 8);
     } else {
-      x->vp9_short_fdct8x8(&x->block[i].src_diff[0],
+      x->fwd_txm8x8(&x->block[i].src_diff[0],
                            &x->block[i].coeff[0], 32);
     }
   }
@@ -264,7 +264,7 @@ void vp9_transform_mby_8x8(MACROBLOCK *x) {
       assert(has_2nd_order == 0);
       vp9_fht_c(b->src_diff, 32, (b + 2)->coeff, tx_type, 8);
     } else {
-      x->vp9_short_fdct8x8(&x->block[i].src_diff[0],
+      x->fwd_txm8x8(&x->block[i].src_diff[0],
                            &x->block[i + 2].coeff[0], 32);
     }
   }
@@ -274,7 +274,7 @@ void vp9_transform_mby_8x8(MACROBLOCK *x) {
     build_dcblock_8x8(x);
 
     // do 2nd order transform on the dc block
-    x->short_fhaar2x2(&x->block[24].src_diff[0],
+    x->fwd_2ndtxm2x2(&x->block[24].src_diff[0],
                       &x->block[24].coeff[0], 8);
   } else {
     vpx_memset(x->block[24].coeff, 0, 16 * sizeof(x->block[24].coeff[0]));
@@ -285,7 +285,7 @@ void vp9_transform_mbuv_8x8(MACROBLOCK *x) {
   int i;
 
   for (i = 16; i < 24; i += 4) {
-    x->vp9_short_fdct8x8(&x->block[i].src_diff[0],
+    x->fwd_txm8x8(&x->block[i].src_diff[0],
                          &x->block[i].coeff[0], 16);
   }
 }
@@ -303,7 +303,7 @@ void vp9_transform_mby_16x16(MACROBLOCK *x) {
   if (tx_type != DCT_DCT) {
     vp9_fht_c(b->src_diff, 32, b->coeff, tx_type, 16);
   } else {
-    x->vp9_short_fdct16x16(&x->block[0].src_diff[0],
+    x->fwd_txm16x16(&x->block[0].src_diff[0],
                            &x->block[0].coeff[0], 32);
   }
 }
@@ -321,9 +321,9 @@ void vp9_transform_sby_32x32(MACROBLOCK *x) {
 void vp9_transform_sbuv_16x16(MACROBLOCK *x) {
   SUPERBLOCK * const x_sb = &x->sb_coeff_data;
   vp9_clear_system_state();
-  x->vp9_short_fdct16x16(x_sb->src_diff + 1024,
+  x->fwd_txm16x16(x_sb->src_diff + 1024,
                          x_sb->coeff + 1024, 32);
-  x->vp9_short_fdct16x16(x_sb->src_diff + 1280,
+  x->fwd_txm16x16(x_sb->src_diff + 1280,
                          x_sb->coeff + 1280, 32);
 }
 
diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c
index ad5fe78..970b9e4 100644
--- a/vp9/encoder/vp9_onyx_if.c
+++ b/vp9/encoder/vp9_onyx_if.c
@@ -752,10 +752,10 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
   sf->quarter_pixel_search = 1;
   sf->half_pixel_search = 1;
   sf->iterative_sub_pixel = 1;
-#if CONFIG_LOSSLESS
-  sf->optimize_coefficients = 0;
-#else
   sf->optimize_coefficients = 1;
+#if CONFIG_LOSSLESS
+  if (cpi->oxcf.lossless)
+    sf->optimize_coefficients = 0;
 #endif
   sf->no_skip_block4x4_search = 1;
   sf->first_step = 0;
@@ -840,20 +840,18 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
     }
   }
 
-  cpi->mb.vp9_short_fdct16x16 = vp9_short_fdct16x16;
-  cpi->mb.vp9_short_fdct8x8 = vp9_short_fdct8x8;
-  cpi->mb.vp9_short_fdct8x4 = vp9_short_fdct8x4;
-  cpi->mb.vp9_short_fdct4x4 = vp9_short_fdct4x4;
-  cpi->mb.short_walsh4x4 = vp9_short_walsh4x4;
-  cpi->mb.short_fhaar2x2 = vp9_short_fhaar2x2;
+  cpi->mb.fwd_txm16x16  = vp9_short_fdct16x16;
+  cpi->mb.fwd_txm8x8    = vp9_short_fdct8x8;
+  cpi->mb.fwd_txm8x4    = vp9_short_fdct8x4;
+  cpi->mb.fwd_txm4x4    = vp9_short_fdct4x4;
+  cpi->mb.fwd_2ndtxm4x4 = vp9_short_walsh4x4;
+  cpi->mb.fwd_2ndtxm2x2 = vp9_short_fhaar2x2;
 
 #if CONFIG_LOSSLESS
   if (cpi->oxcf.lossless) {
-    cpi->mb.vp9_short_fdct8x4 = vp9_short_walsh8x4_x8;
-    cpi->mb.vp9_short_fdct4x4 = vp9_short_walsh4x4_x8;
-    cpi->mb.short_walsh4x4 = vp9_short_walsh4x4;
-    cpi->mb.short_fhaar2x2 = vp9_short_fhaar2x2;
-    cpi->mb.short_walsh4x4 = vp9_short_walsh4x4_lossless;
+    cpi->mb.fwd_txm8x4    = vp9_short_walsh8x4_x8;
+    cpi->mb.fwd_txm4x4    = vp9_short_walsh4x4_x8;
+    cpi->mb.fwd_2ndtxm4x4 = vp9_short_walsh4x4_lossless;
   }
 #endif
 
@@ -1206,18 +1204,18 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
   cpi->oxcf.best_allowed_q = q_trans[oxcf->best_allowed_q];
   cpi->oxcf.cq_level = q_trans[cpi->oxcf.cq_level];
 
-  cpi->mb.e_mbd.inv_xform4x4_1_x8     = vp9_short_idct4x4llm_1;
-  cpi->mb.e_mbd.inv_xform4x4_x8       = vp9_short_idct4x4llm;
-  cpi->mb.e_mbd.inv_walsh4x4_1        = vp9_short_inv_walsh4x4_1;
-  cpi->mb.e_mbd.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4;
+  cpi->mb.e_mbd.inv_txm4x4_1    = vp9_short_idct4x4llm_1;
+  cpi->mb.e_mbd.inv_txm4x4      = vp9_short_idct4x4llm;
+  cpi->mb.e_mbd.inv_2ndtxm4x4_1 = vp9_short_inv_walsh4x4_1;
+  cpi->mb.e_mbd.inv_2ndtxm4x4   = vp9_short_inv_walsh4x4;
 
 #if CONFIG_LOSSLESS
   cpi->oxcf.lossless = oxcf->lossless;
   if (cpi->oxcf.lossless) {
-    cpi->mb.e_mbd.inv_xform4x4_1_x8     = vp9_short_inv_walsh4x4_1_x8;
-    cpi->mb.e_mbd.inv_xform4x4_x8       = vp9_short_inv_walsh4x4_x8;
-    cpi->mb.e_mbd.inv_walsh4x4_1        = vp9_short_inv_walsh4x4_1_lossless;
-    cpi->mb.e_mbd.inv_walsh4x4_lossless = vp9_short_inv_walsh4x4_lossless;
+    cpi->mb.e_mbd.inv_txm4x4_1    = vp9_short_inv_walsh4x4_1_x8;
+    cpi->mb.e_mbd.inv_txm4x4      = vp9_short_inv_walsh4x4_x8;
+    cpi->mb.e_mbd.inv_2ndtxm4x4_1 = vp9_short_inv_walsh4x4_1_lossless;
+    cpi->mb.e_mbd.inv_2ndtxm4x4   = vp9_short_inv_walsh4x4_lossless;
   }
 #endif
 
@@ -2619,10 +2617,10 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
   // For 2 Pass Only used where GF/ARF prediction quality
   // is above a threshold
   cpi->zbin_mode_boost = 0;
-#if CONFIG_LOSSLESS
-  cpi->zbin_mode_boost_enabled = FALSE;
-#else
   cpi->zbin_mode_boost_enabled = TRUE;
+#if CONFIG_LOSSLESS
+  if (cpi->oxcf.lossless)
+    cpi->zbin_mode_boost_enabled = FALSE;
 #endif
   if (cpi->gfu_boost <= 400) {
     cpi->zbin_mode_boost_enabled = FALSE;
diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c
index b5dbef0..9b99016 100644
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -460,18 +460,14 @@ void vp9_init_quantizer(VP9_COMP *cpi) {
   static const int zbin_boost[16] = { 0,  0,  0,  8,  8,  8, 10, 12,
                                      14, 16, 20, 24, 28, 32, 36, 40 };
 
-
-  int qrounding_factor = 48;
-
   for (Q = 0; Q < QINDEX_RANGE; Q++) {
     int qzbin_factor = (vp9_dc_quant(Q, 0) < 148) ? 84 : 80;
 
+    int qrounding_factor = 48;
 #if CONFIG_LOSSLESS
-    if (cpi->oxcf.lossless) {
-      if (Q == 0) {
-        qzbin_factor = 64;
-        qrounding_factor = 64;
-      }
+    if (cpi->oxcf.lossless && Q == 0) {
+      qzbin_factor = 64;
+      qrounding_factor = 64;
     }
 #endif
 
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 762a929..05277c5 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -1140,7 +1140,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be,
       vp9_fht(be->src_diff, 32, be->coeff, tx_type, 4);
       vp9_ht_quantize_b_4x4(be, b, tx_type);
     } else {
-      x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);
+      x->fwd_txm4x4(be->src_diff, be->coeff, 32);
       x->quantize_b_4x4(be, b);
     }
 
@@ -1172,7 +1172,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be,
   if (best_tx_type != DCT_DCT)
     vp9_ihtllm(best_dqcoeff, b->diff, 32, best_tx_type, 4, b->eob);
   else
-    xd->inv_xform4x4_x8(best_dqcoeff, b->diff, 32);
+    xd->inv_txm4x4(best_dqcoeff, b->diff, 32);
 
   vp9_recon_b(best_predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
 
@@ -1436,7 +1436,7 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
       if (tx_type != DCT_DCT)
         vp9_fht(be->src_diff, 32, (x->block + idx)->coeff, tx_type, 8);
       else
-        x->vp9_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32);
+        x->fwd_txm8x8(be->src_diff, (x->block + idx)->coeff, 32);
       x->quantize_b_8x8(x->block + idx, xd->block + idx);
 
       // compute quantization mse of 8x8 block
@@ -1470,11 +1470,11 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
           vp9_fht_c(be->src_diff, 32, be->coeff, tx_type, 4);
           vp9_ht_quantize_b_4x4(be, b, tx_type);
         } else if (!(i & 1) && get_tx_type_4x4(xd, b + 1) == DCT_DCT) {
-          x->vp9_short_fdct8x4(be->src_diff, be->coeff, 32);
+          x->fwd_txm8x4(be->src_diff, be->coeff, 32);
           x->quantize_b_4x4_pair(be, be + 1, b, b + 1);
           do_two = 1;
         } else {
-          x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);
+          x->fwd_txm4x4(be->src_diff, be->coeff, 32);
           x->quantize_b_4x4(be, b);
         }
         distortion += vp9_block_error_c(be->coeff, b->dqcoeff, 16 << do_two);
@@ -2244,7 +2244,7 @@ static int64_t encode_inter_mb_segment(MACROBLOCK *x,
       if (xd->mode_info_context->mbmi.second_ref_frame > 0)
         vp9_build_2nd_inter_predictors_b(bd, 16, &xd->subpix);
       vp9_subtract_b(be, bd, 16);
-      x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);
+      x->fwd_txm4x4(be->src_diff, be->coeff, 32);
       x->quantize_b_4x4(be, bd);
       thisdistortion = vp9_block_error(be->coeff, bd->dqcoeff, 16);
       *distortion += thisdistortion;
@@ -2296,7 +2296,7 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x,
 
       if (xd->mode_info_context->mbmi.txfm_size == TX_4X4) {
         if (otherrd) {
-          x->vp9_short_fdct8x8(be->src_diff, be2->coeff, 32);
+          x->fwd_txm8x8(be->src_diff, be2->coeff, 32);
           x->quantize_b_8x8(be2, bd2);
           thisdistortion = vp9_block_error_c(be2->coeff, bd2->dqcoeff, 64);
           otherdist += thisdistortion;
@@ -2308,7 +2308,7 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x,
         for (j = 0; j < 4; j += 2) {
           bd = &xd->block[ib + iblock[j]];
           be = &x->block[ib + iblock[j]];
-          x->vp9_short_fdct8x4(be->src_diff, be->coeff, 32);
+          x->fwd_txm8x4(be->src_diff, be->coeff, 32);
           x->quantize_b_4x4_pair(be, be + 1, bd, bd + 1);
           thisdistortion = vp9_block_error_c(be->coeff, bd->dqcoeff, 32);
           *distortion += thisdistortion;
@@ -2326,7 +2326,7 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x,
           for (j = 0; j < 4; j += 2) {
             BLOCKD *bd = &xd->block[ib + iblock[j]];
             BLOCK *be = &x->block[ib + iblock[j]];
-            x->vp9_short_fdct8x4(be->src_diff, be->coeff, 32);
+            x->fwd_txm8x4(be->src_diff, be->coeff, 32);
             x->quantize_b_4x4_pair(be, be + 1, bd, bd + 1);
             thisdistortion = vp9_block_error_c(be->coeff, bd->dqcoeff, 32);
             otherdist += thisdistortion;
@@ -2340,7 +2340,7 @@ static int64_t encode_inter_mb_segment_8x8(MACROBLOCK *x,
                            TX_4X4);
           }
         }
-        x->vp9_short_fdct8x8(be->src_diff, be2->coeff, 32);
+        x->fwd_txm8x8(be->src_diff, be2->coeff, 32);
         x->quantize_b_8x8(be2, bd2);
         thisdistortion = vp9_block_error_c(be2->coeff, bd2->dqcoeff, 64);
         *distortion += thisdistortion;
-- 
2.7.4