#define HORIZONTAL_SHIFT 17 // 15
#define HORIZONTAL_ROUNDING ((1 << (HORIZONTAL_SHIFT - 1)) - 1)
void vp9_ihtllm_c(const int16_t *input, int16_t *output, int pitch,
- TX_TYPE tx_type, int tx_dim) {
+ TX_TYPE tx_type, int tx_dim, uint16_t eobs) {
int i, j, k;
+ int nz_dim;
int16_t imbuf[256];
const int16_t *ip = input;
break;
}
+ nz_dim = tx_dim;
+ if(tx_dim > 4) {
+ if(eobs < 36) {
+ vpx_memset(im, 0, 512);
+ nz_dim = 8;
+ if(eobs < 3) {
+ nz_dim = 2;
+ } else if(eobs < 10) {
+ nz_dim = 4;
+ }
+ }
+ }
+
/* vertical transformation */
for (j = 0; j < tx_dim; j++) {
- for (i = 0; i < tx_dim; i++) {
+ for (i = 0; i < nz_dim; i++) {
int temp = 0;
- for (k = 0; k < tx_dim; k++) {
+ for (k = 0; k < nz_dim; k++) {
temp += ptv[k] * ip[(k * tx_dim)];
}
for (i = 0; i < tx_dim; i++) {
int temp = 0;
- for (k = 0; k < tx_dim; k++) {
+ for (k = 0; k < nz_dim; k++) {
temp += im[k] * pthc[k];
}
TX_TYPE tx_type = get_tx_type_4x4(xd, &xd->block[i]);
if (tx_type != DCT_DCT) {
vp9_ihtllm(xd->block[i].dqcoeff, xd->block[i].diff, 32,
- tx_type, 4);
+ tx_type, 4, xd->block[i].eob);
} else {
vp9_inverse_transform_b_4x4(xd, i, 32);
}
for (i = 0; i < 9; i += 8) {
TX_TYPE tx_type = get_tx_type_8x8(xd, &xd->block[i]);
if (tx_type != DCT_DCT) {
- vp9_ihtllm(xd->block[i].dqcoeff, xd->block[i].diff, 32, tx_type, 8);
+ vp9_ihtllm(xd->block[i].dqcoeff, xd->block[i].diff, 32, tx_type, 8,
+ xd->block[i].eob);
} else {
vp9_inverse_transform_b_8x8(&blockd[i].dqcoeff[0],
&blockd[i].diff[0], 32);
for (i = 2; i < 11; i += 8) {
TX_TYPE tx_type = get_tx_type_8x8(xd, &xd->block[i]);
if (tx_type != DCT_DCT) {
- vp9_ihtllm(xd->block[i + 2].dqcoeff, xd->block[i].diff, 32, tx_type, 8);
+ vp9_ihtllm(xd->block[i + 2].dqcoeff, xd->block[i].diff, 32, tx_type, 8,
+ xd->block[i + 2].eob);
} else {
vp9_inverse_transform_b_8x8(&blockd[i + 2].dqcoeff[0],
&blockd[i].diff[0], 32);
BLOCKD *bd = &xd->block[0];
TX_TYPE tx_type = get_tx_type_16x16(xd, bd);
if (tx_type != DCT_DCT) {
- vp9_ihtllm(bd->dqcoeff, bd->diff, 32, tx_type, 16);
+ vp9_ihtllm(bd->dqcoeff, bd->diff, 32, tx_type, 16, bd->eob);
} else {
vp9_inverse_transform_b_16x16(&xd->block[0].dqcoeff[0],
&xd->block[0].diff[0], 32);
prototype void vp9_short_idct10_16x16 "short *input, short *output, int pitch"
specialize vp9_short_idct10_16x16
-prototype void vp9_ihtllm "const short *input, short *output, int pitch, int tx_type, int tx_dim"
+prototype void vp9_ihtllm "const short *input, short *output, int pitch, int tx_type, int tx_dim, short eobs"
specialize vp9_ihtllm
#
if (tx_type != DCT_DCT) {
vp9_ht_dequant_idct_add_16x16_c(tx_type, xd->qcoeff,
xd->block[0].dequant, xd->predictor,
- xd->dst.y_buffer, 16, xd->dst.y_stride);
+ xd->dst.y_buffer, 16, xd->dst.y_stride,
+ xd->eobs[0]);
} else {
vp9_dequant_idct_add_16x16(xd->qcoeff, xd->block[0].dequant,
xd->predictor, xd->dst.y_buffer,
}
tx_type = get_tx_type_8x8(xd, &xd->block[ib]);
if (tx_type != DCT_DCT) {
- vp9_ht_dequant_idct_add_8x8_c(tx_type, q, dq, pre, dst, 16, stride);
+ vp9_ht_dequant_idct_add_8x8_c(tx_type, q, dq, pre, dst, 16, stride,
+ xd->eobs[idx]);
} else {
vp9_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride,
0, xd->eobs[idx]);
vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff,
b->dequant, b->predictor,
*(b->base_dst) + b->dst, 16,
- b->dst_stride);
+ b->dst_stride, b->eob);
} else {
vp9_dequant_idct_add(b->qcoeff, b->dequant, b->predictor,
*(b->base_dst) + b->dst, 16, b->dst_stride);
if (tx_type != DCT_DCT) {
vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff,
b->dequant, b->predictor,
- *(b->base_dst) + b->dst, 16, b->dst_stride);
+ *(b->base_dst) + b->dst, 16, b->dst_stride,
+ b->eob);
} else {
vp9_dequant_idct_add(b->qcoeff, b->dequant, b->predictor,
*(b->base_dst) + b->dst, 16, b->dst_stride);
vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff,
b->dequant, b->predictor,
*(b->base_dst) + b->dst, 16,
- b->dst_stride);
+ b->dst_stride, b->eob);
} else {
vp9_dequant_idct_add(b->qcoeff, b->dequant, b->predictor,
*(b->base_dst) + b->dst, 16, b->dst_stride);
tx_type, xd->qcoeff, xd->block[0].dequant,
xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
xd->dst.y_buffer + y_idx * 16 * xd->dst.y_stride + x_idx * 16,
- xd->dst.y_stride, xd->dst.y_stride);
+ xd->dst.y_stride, xd->dst.y_stride, xd->block[0].eob);
} else {
vp9_dequant_idct_add_16x16(
xd->qcoeff, xd->block[0].dequant,
+ x_idx * 16 + (i & 1) * 8,
xd->dst.y_buffer + (y_idx * 16 + (i / 2) * 8) * xd->dst.y_stride
+ x_idx * 16 + (i & 1) * 8,
- stride, stride);
+ stride, stride, b->eob);
} else {
vp9_dequant_idct_add_8x8_c(
q, dq,
+ x_idx * 16 + (i & 3) * 4,
xd->dst.y_buffer + (y_idx * 16 + (i / 4) * 4) * xd->dst.y_stride
+ x_idx * 16 + (i & 3) * 4,
- xd->dst.y_stride, xd->dst.y_stride);
+ xd->dst.y_stride, xd->dst.y_stride, b->eob);
} else {
vp9_dequant_idct_add_c(
b->qcoeff, b->dequant,
#include "vp9/decoder/vp9_dequantize.h"
#include "vpx_mem/vpx_mem.h"
#include "vp9/decoder/vp9_onyxd_int.h"
-
static void add_residual(const int16_t *diff, const uint8_t *pred, int pitch,
uint8_t *dest, int stride, int width, int height) {
int r, c;
void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, int16_t *input,
const int16_t *dq,
uint8_t *pred, uint8_t *dest,
- int pitch, int stride) {
+ int pitch, int stride, uint16_t eobs) {
int16_t output[16];
int16_t *diff_ptr = output;
int i;
input[i] = dq[i] * input[i];
}
- vp9_ihtllm(input, output, 4 << 1, tx_type, 4);
+ vp9_ihtllm(input, output, 4 << 1, tx_type, 4, eobs);
vpx_memset(input, 0, 32);
void vp9_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, int16_t *input,
const int16_t *dq,
uint8_t *pred, uint8_t *dest,
- int pitch, int stride) {
+ int pitch, int stride, uint16_t eobs) {
int16_t output[64];
int16_t *diff_ptr = output;
int i;
+ if (eobs == 0) {
+ /* All 0 DCT coefficient */
+ vp9_copy_mem8x8(pred, pitch, dest, stride);
+ } else if (eobs > 0) {
+ input[0] = dq[0] * input[0];
+ for (i = 1; i < 64; i++) {
+ input[i] = dq[1] * input[i];
+ }
- input[0] = dq[0] * input[0];
- for (i = 1; i < 64; i++) {
- input[i] = dq[1] * input[i];
- }
-
- vp9_ihtllm(input, output, 16, tx_type, 8);
+ vp9_ihtllm(input, output, 16, tx_type, 8, eobs);
- vpx_memset(input, 0, 128);
+ vpx_memset(input, 0, 128);
- add_residual(diff_ptr, pred, pitch, dest, stride, 8, 8);
+ add_residual(diff_ptr, pred, pitch, dest, stride, 8, 8);
+ }
}
void vp9_dequant_idct_add_c(int16_t *input, const int16_t *dq, uint8_t *pred,
void vp9_ht_dequant_idct_add_16x16_c(TX_TYPE tx_type, int16_t *input,
const int16_t *dq, uint8_t *pred,
- uint8_t *dest, int pitch, int stride) {
+ uint8_t *dest, int pitch, int stride,
+ uint16_t eobs) {
int16_t output[256];
int16_t *diff_ptr = output;
int i;
+ if (eobs == 0) {
+ /* All 0 DCT coefficient */
+ vp9_copy_mem16x16(pred, pitch, dest, stride);
+ } else if (eobs > 0) {
+ input[0]= input[0] * dq[0];
- input[0]= input[0] * dq[0];
-
- // recover quantizer for 4 4x4 blocks
- for (i = 1; i < 256; i++)
- input[i] = input[i] * dq[1];
+ // recover quantizer for 4 4x4 blocks
+ for (i = 1; i < 256; i++)
+ input[i] = input[i] * dq[1];
- // inverse hybrid transform
- vp9_ihtllm(input, output, 32, tx_type, 16);
+ // inverse hybrid transform
+ vp9_ihtllm(input, output, 32, tx_type, 16, eobs);
- // the idct halves ( >> 1) the pitch
- // vp9_short_idct16x16_c(input, output, 32);
+ // the idct halves ( >> 1) the pitch
+ // vp9_short_idct16x16_c(input, output, 32);
- vpx_memset(input, 0, 512);
+ vpx_memset(input, 0, 512);
- add_residual(diff_ptr, pred, pitch, dest, stride, 16, 16);
+ add_residual(diff_ptr, pred, pitch, dest, stride, 16, 16);
+ }
}
void vp9_dequant_idct_add_16x16_c(int16_t *input, const int16_t *dq,
void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, short *input, const short *dq,
unsigned char *pred, unsigned char *dest,
- int pitch, int stride);
+ int pitch, int stride, uint16_t eobs);
void vp9_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, short *input,
const short *dq, unsigned char *pred,
- unsigned char *dest, int pitch, int stride);
+ unsigned char *dest, int pitch, int stride,
+ uint16_t eobs);
void vp9_ht_dequant_idct_add_16x16_c(TX_TYPE tx_type, short *input,
const short *dq, unsigned char *pred,
unsigned char *dest,
- int pitch, int stride);
+ int pitch, int stride, uint16_t eobs);
#if CONFIG_SUPERBLOCKS
void vp9_dequant_dc_idct_add_y_block_8x8_inplace_c(short *q, const short *dq,
if (tx_type != DCT_DCT) {
vp9_fht(be->src_diff, 32, be->coeff, tx_type, 4);
vp9_ht_quantize_b_4x4(be, b, tx_type);
- vp9_ihtllm(b->dqcoeff, b->diff, 32, tx_type, 4);
+ vp9_ihtllm(b->dqcoeff, b->diff, 32, tx_type, 4, b->eob);
} else {
x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);
x->quantize_b_4x4(be, b) ;
tx_type, 8);
x->quantize_b_8x8(x->block + idx, xd->block + idx);
vp9_ihtllm(xd->block[idx].dqcoeff, xd->block[ib].diff, 32,
- tx_type, 8);
+ tx_type, 8, xd->block[idx].eob);
} else {
x->vp9_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32);
x->quantize_b_8x8(x->block + idx, xd->block + idx);
if (tx_type != DCT_DCT) {
vp9_fht_c(be->src_diff, 32, be->coeff, tx_type, 4);
vp9_ht_quantize_b_4x4(be, b, tx_type);
- vp9_ihtllm(b->dqcoeff, b->diff, 32, tx_type, 4);
+ vp9_ihtllm(b->dqcoeff, b->diff, 32, tx_type, 4, b->eob);
} else {
x->vp9_short_fdct4x4(be->src_diff, be->coeff, 32);
x->quantize_b_4x4(be, b);
// inverse transform
if (best_tx_type != DCT_DCT)
- vp9_ihtllm(best_dqcoeff, b->diff, 32, best_tx_type, 4);
+ vp9_ihtllm(best_dqcoeff, b->diff, 32, best_tx_type, 4, b->eob);
else
xd->inv_xform4x4_x8(best_dqcoeff, b->diff, 32);