From 5d7c8130b80ffc27fc73a2ca1a55352d4b4e34c4 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Christian=20K=C3=B6nig?= Date: Sun, 15 May 2011 14:53:13 +0200 Subject: [PATCH] [g3dvl] pre apply zscan to quant matrix --- src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c | 119 +++++++++++-------------- 1 file changed, 51 insertions(+), 68 deletions(-) diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c index a7aa5cd..8854988 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c +++ b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c @@ -721,11 +721,9 @@ get_chroma_dc_dct_diff(struct vl_mpg12_bs *bs) } static inline void -get_intra_block_B14(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, - const int scan[64], int quantizer_scale, short *dest) +get_intra_block_B14(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quantizer_scale, short *dest) { - int i, j, val; - uint8_t *quant_matrix = picture->intra_quantizer_matrix; + int i, val; int mismatch; const DCTtab *tab; @@ -744,11 +742,9 @@ get_intra_block_B14(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi break; /* end of block */ normal_code: - j = scan[i]; - bs->vlc.buf <<= tab->len; bs->vlc.bits += tab->len + 1; - val = (tab->level * quantizer_scale * quant_matrix[j]) >> 4; + val = (tab->level * quantizer_scale * quant_matrix[i]) >> 4; /* if (bitstream_get (1)) val = -val; */ val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1); @@ -776,11 +772,9 @@ get_intra_block_B14(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi if (i >= 64) break; /* illegal, check needed to avoid buffer overflow */ - j = scan[i]; - vl_vlc_dumpbits(&bs->vlc, 12); vl_vlc_needbits(&bs->vlc); - val = (vl_vlc_sbits(&bs->vlc, 12) * quantizer_scale * quant_matrix[j]) / 16; + val = (vl_vlc_sbits(&bs->vlc, 12) * quantizer_scale * quant_matrix[i]) / 16; SATURATE (val); dest[i] = val; @@ -822,11 +816,9 @@ get_intra_block_B14(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi } static inline void -get_intra_block_B15(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, - const int scan[64], int quantizer_scale, short *dest) +get_intra_block_B15(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quantizer_scale, short *dest) { - int i, j, val; - uint8_t *quant_matrix = picture->intra_quantizer_matrix; + int i, val; int mismatch; const DCTtab * tab; @@ -844,10 +836,9 @@ get_intra_block_B15(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi if (i < 64) { normal_code: - j = scan[i]; bs->vlc.buf <<= tab->len; bs->vlc.bits += tab->len + 1; - val = (tab->level * quantizer_scale * quant_matrix[j]) >> 4; + val = (tab->level * quantizer_scale * quant_matrix[i]) >> 4; /* if (bitstream_get (1)) val = -val; */ val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1); @@ -874,11 +865,9 @@ get_intra_block_B15(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi if (i >= 64) break; /* illegal, check against buffer overflow */ - j = scan[i]; - vl_vlc_dumpbits(&bs->vlc, 12); vl_vlc_needbits(&bs->vlc); - val = (vl_vlc_sbits(&bs->vlc, 12) * quantizer_scale * quant_matrix[j]) / 16; + val = (vl_vlc_sbits(&bs->vlc, 12) * quantizer_scale * quant_matrix[i]) / 16; SATURATE (val); dest[i] = val; @@ -921,11 +910,9 @@ get_intra_block_B15(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi } static inline void -get_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, - const int scan[64], int quantizer_scale, short *dest) +get_non_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quantizer_scale, short *dest) { - int i, j, val; - uint8_t *quant_matrix = picture->non_intra_quantizer_matrix; + int i, val; int mismatch; const DCTtab *tab; @@ -950,10 +937,9 @@ get_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi break; /* end of block */ normal_code: - j = scan[i]; bs->vlc.buf <<= tab->len; bs->vlc.bits += tab->len + 1; - val = ((2*tab->level+1) * quantizer_scale * quant_matrix[j]) >> 5; + val = ((2*tab->level+1) * quantizer_scale * quant_matrix[i]) >> 5; /* if (bitstream_get (1)) val = -val; */ val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1); @@ -984,12 +970,10 @@ get_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi if (i >= 64) break; /* illegal, check needed to avoid buffer overflow */ - j = scan[i]; - vl_vlc_dumpbits(&bs->vlc, 12); vl_vlc_needbits(&bs->vlc); val = 2 * (vl_vlc_sbits(&bs->vlc, 12) + vl_vlc_sbits(&bs->vlc, 1)) + 1; - val = (val * quantizer_scale * quant_matrix[j]) / 32; + val = (val * quantizer_scale * quant_matrix[i]) / 32; SATURATE (val); dest[i] = val; @@ -1030,11 +1014,9 @@ get_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi } static inline void -get_mpeg1_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, - const int scan[64], int quantizer_scale, short *dest) +get_mpeg1_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quantizer_scale, short *dest) { - int i, j, val; - uint8_t *quant_matrix = picture->intra_quantizer_matrix; + int i, val; const DCTtab * tab; i = 0; @@ -1051,10 +1033,9 @@ get_mpeg1_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * break; /* end of block */ normal_code: - j = scan[i]; bs->vlc.buf <<= tab->len; bs->vlc.bits += tab->len + 1; - val = (tab->level * quantizer_scale * quant_matrix[j]) >> 4; + val = (tab->level * quantizer_scale * quant_matrix[i]) >> 4; /* oddification */ val = (val - 1) | 1; @@ -1084,8 +1065,6 @@ get_mpeg1_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * if (i >= 64) break; /* illegal, check needed to avoid buffer overflow */ - j = scan[i]; - vl_vlc_dumpbits(&bs->vlc, 12); vl_vlc_needbits(&bs->vlc); val = vl_vlc_sbits(&bs->vlc, 8); @@ -1093,7 +1072,7 @@ get_mpeg1_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * vl_vlc_dumpbits(&bs->vlc, 8); val = vl_vlc_ubits(&bs->vlc, 8) + 2 * val; } - val = (val * quantizer_scale * quant_matrix[j]) / 16; + val = (val * quantizer_scale * quant_matrix[i]) / 16; /* oddification */ val = (val + ~SBITS (val, 1)) | 1; @@ -1135,11 +1114,9 @@ get_mpeg1_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * } static inline void -get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, - const int scan[64], int quantizer_scale, short *dest) +get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quantizer_scale, short *dest) { - int i, j, val; - uint8_t *quant_matrix = picture->non_intra_quantizer_matrix; + int i, val; const DCTtab * tab; i = -1; @@ -1162,10 +1139,9 @@ get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_des break; /* end of block */ normal_code: - j = scan[i]; bs->vlc.buf <<= tab->len; bs->vlc.bits += tab->len + 1; - val = ((2*tab->level+1) * quantizer_scale * quant_matrix[j]) >> 5; + val = ((2*tab->level+1) * quantizer_scale * quant_matrix[i]) >> 5; /* oddification */ val = (val - 1) | 1; @@ -1198,8 +1174,6 @@ get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_des if (i >= 64) break; /* illegal, check needed to avoid buffer overflow */ - j = scan[i]; - vl_vlc_dumpbits(&bs->vlc, 12); vl_vlc_needbits(&bs->vlc); val = vl_vlc_sbits(&bs->vlc, 8); @@ -1208,7 +1182,7 @@ get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_des val = vl_vlc_ubits(&bs->vlc, 8) + 2 * val; } val = 2 * (val + SBITS (val, 1)) + 1; - val = (val * quantizer_scale * quant_matrix[j]) / 32; + val = (val * quantizer_scale * quant_matrix[i]) / 32; /* oddification */ val = (val + ~SBITS (val, 1)) | 1; @@ -1250,7 +1224,7 @@ get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_des } static inline void -slice_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, const int scan[64], int cc, +slice_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, const int quant_matrix[64], int cc, unsigned x, unsigned y, enum pipe_mpeg12_dct_type coding, int quantizer_scale, int dc_dct_pred[3]) { short dest[64]; @@ -1272,11 +1246,11 @@ slice_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pictur dest[0] = dc_dct_pred[cc] << (3 - picture->intra_dc_precision); if (picture->mpeg1) { if (picture->picture_coding_type != D_TYPE) - get_mpeg1_intra_block(bs, picture, scan, quantizer_scale, dest); + get_mpeg1_intra_block(bs, quant_matrix, quantizer_scale, dest); } else if (picture->intra_vlc_format) - get_intra_block_B15(bs, picture, scan, quantizer_scale, dest); + get_intra_block_B15(bs, quant_matrix, quantizer_scale, dest); else - get_intra_block_B14(bs, picture, scan, quantizer_scale, dest); + get_intra_block_B14(bs, quant_matrix, quantizer_scale, dest); memcpy(bs->ycbcr_buffer[cc], dest, sizeof(int16_t) * 64); @@ -1286,8 +1260,8 @@ slice_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pictur } static inline void -slice_non_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, const int scan[64], int cc, - unsigned x, unsigned y, int quantizer_scale, enum pipe_mpeg12_dct_type coding) +slice_non_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, const int quant_matrix[64], int cc, + unsigned x, unsigned y, enum pipe_mpeg12_dct_type coding, int quantizer_scale) { short dest[64]; @@ -1298,9 +1272,9 @@ slice_non_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi memset(dest, 0, sizeof(int16_t) * 64); if (picture->mpeg1) - get_mpeg1_non_intra_block(bs, picture, scan, quantizer_scale, dest); + get_mpeg1_non_intra_block(bs, quant_matrix, quantizer_scale, dest); else - get_non_intra_block(bs, picture, scan, quantizer_scale, dest); + get_non_intra_block(bs, quant_matrix, quantizer_scale, dest); memcpy(bs->ycbcr_buffer[cc], dest, sizeof(int16_t) * 64); @@ -1619,7 +1593,8 @@ slice_init(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, } static inline bool -decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture, const int scan[64]) +decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture, + const int intra_quantizer_matrix[64], const int non_intra_quantizer_matrix[64]) { enum pipe_video_field_select default_field_select; struct pipe_motionvector mv_fwd, mv_bwd; @@ -1706,12 +1681,12 @@ decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture, c mv_bwd.top.weight = mv_bwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MIN; // unravaled loop of 6 block(i) calls in macroblock() - slice_intra_DCT(bs, picture, scan, 0, x*2+0, y*2+0, dct_type, quantizer_scale, dc_dct_pred); - slice_intra_DCT(bs, picture, scan, 0, x*2+1, y*2+0, dct_type, quantizer_scale, dc_dct_pred); - slice_intra_DCT(bs, picture, scan, 0, x*2+0, y*2+1, dct_type, quantizer_scale, dc_dct_pred); - slice_intra_DCT(bs, picture, scan, 0, x*2+1, y*2+1, dct_type, quantizer_scale, dc_dct_pred); - slice_intra_DCT(bs, picture, scan, 1, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale, dc_dct_pred); - slice_intra_DCT(bs, picture, scan, 2, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale, dc_dct_pred); + slice_intra_DCT(bs, picture, intra_quantizer_matrix, 0, x*2+0, y*2+0, dct_type, quantizer_scale, dc_dct_pred); + slice_intra_DCT(bs, picture, intra_quantizer_matrix, 0, x*2+1, y*2+0, dct_type, quantizer_scale, dc_dct_pred); + slice_intra_DCT(bs, picture, intra_quantizer_matrix, 0, x*2+0, y*2+1, dct_type, quantizer_scale, dc_dct_pred); + slice_intra_DCT(bs, picture, intra_quantizer_matrix, 0, x*2+1, y*2+1, dct_type, quantizer_scale, dc_dct_pred); + slice_intra_DCT(bs, picture, intra_quantizer_matrix, 1, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale, dc_dct_pred); + slice_intra_DCT(bs, picture, intra_quantizer_matrix, 2, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale, dc_dct_pred); if (picture->picture_coding_type == D_TYPE) { vl_vlc_needbits(&bs->vlc); @@ -1769,17 +1744,17 @@ decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture, c // TODO optimize not fully used for idct accel only mc. if (coded_block_pattern & 0x20) - slice_non_intra_DCT(bs, picture, scan, 0, x*2+0, y*2+0, quantizer_scale, dct_type); // cc0 luma 0 + slice_non_intra_DCT(bs, picture, non_intra_quantizer_matrix, 0, x*2+0, y*2+0, dct_type, quantizer_scale); // cc0 luma 0 if (coded_block_pattern & 0x10) - slice_non_intra_DCT(bs, picture, scan, 0, x*2+1, y*2+0, quantizer_scale, dct_type); // cc0 luma 1 + slice_non_intra_DCT(bs, picture, non_intra_quantizer_matrix, 0, x*2+1, y*2+0, dct_type, quantizer_scale); // cc0 luma 1 if (coded_block_pattern & 0x08) - slice_non_intra_DCT(bs, picture, scan, 0, x*2+0, y*2+1, quantizer_scale, dct_type); // cc0 luma 2 + slice_non_intra_DCT(bs, picture, non_intra_quantizer_matrix, 0, x*2+0, y*2+1, dct_type, quantizer_scale); // cc0 luma 2 if (coded_block_pattern & 0x04) - slice_non_intra_DCT(bs, picture, scan, 0, x*2+1, y*2+1, quantizer_scale, dct_type); // cc0 luma 3 + slice_non_intra_DCT(bs, picture, non_intra_quantizer_matrix, 0, x*2+1, y*2+1, dct_type, quantizer_scale); // cc0 luma 3 if (coded_block_pattern & 0x2) - slice_non_intra_DCT(bs, picture, scan, 1, x, y, quantizer_scale, PIPE_MPEG12_DCT_TYPE_FRAME); // cc1 croma + slice_non_intra_DCT(bs, picture, non_intra_quantizer_matrix, 1, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale); // cc1 croma if (coded_block_pattern & 0x1) - slice_non_intra_DCT(bs, picture, scan, 2, x, y, quantizer_scale, PIPE_MPEG12_DCT_TYPE_FRAME); // cc2 croma + slice_non_intra_DCT(bs, picture, non_intra_quantizer_matrix, 2, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale); // cc2 croma } dc_dct_pred[0] = dc_dct_pred[1] = dc_dct_pred[2] = 0; @@ -1881,7 +1856,11 @@ void vl_mpg12_bs_decode(struct vl_mpg12_bs *bs, unsigned num_bytes, const void *buffer, struct pipe_mpeg12_picture_desc *picture, unsigned num_ycbcr_blocks[3]) { + int intra_quantizer_matrix[64]; + int non_intra_quantizer_matrix[64]; + const int *scan; + unsigned i; assert(bs); assert(num_ycbcr_blocks); @@ -1892,6 +1871,10 @@ vl_mpg12_bs_decode(struct vl_mpg12_bs *bs, unsigned num_bytes, const void *buffe vl_vlc_init(&bs->vlc, buffer, num_bytes); scan = picture->alternate_scan ? vl_zscan_alternate : vl_zscan_normal; + for (i = 0; i < 64; ++i) { + intra_quantizer_matrix[i] = picture->intra_quantizer_matrix[scan[i]]; + non_intra_quantizer_matrix[i] = picture->non_intra_quantizer_matrix[scan[i]]; + } - while(decode_slice(bs, picture, scan)); + while(decode_slice(bs, picture, intra_quantizer_matrix, non_intra_quantizer_matrix)); } -- 2.7.4