From 912dc8ff09cd7c28926762c2e562de5a99d3e27a Mon Sep 17 00:00:00 2001 From: =?utf8?q?Christian=20K=C3=B6nig?= Date: Sun, 29 May 2011 19:53:45 +0200 Subject: [PATCH] [g3dvl] move quantification into shaders --- src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c | 107 +++++++------------ src/gallium/auxiliary/vl/vl_mpeg12_decoder.c | 23 ++++- src/gallium/auxiliary/vl/vl_zscan.c | 138 +++++++++++++++++++------ src/gallium/auxiliary/vl/vl_zscan.h | 12 +-- 4 files changed, 168 insertions(+), 112 deletions(-) diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c index bf9b6cd..e7fbc31 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c +++ b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c @@ -55,7 +55,6 @@ #include #include "vl_vlc.h" -#include "vl_zscan.h" #include "vl_mpeg12_bitstream.h" /* take num bits from the high part of bit_buf and zero extend them */ @@ -64,12 +63,6 @@ /* take num bits from the high part of bit_buf and sign extend them */ #define SBITS(buf,num) (((int32_t)(buf)) >> (32 - (num))) -#define SATURATE(val) \ -do { \ - if ((uint32_t)(val + 2048) > 4095) \ - val = (val > 0) ? 2047 : -2048; \ -} while (0) - /* macroblock modes */ #define MACROBLOCK_INTRA 1 #define MACROBLOCK_PATTERN 2 @@ -721,7 +714,7 @@ get_chroma_dc_dct_diff(struct vl_mpg12_bs *bs) } static inline void -get_intra_block_B14(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quantizer_scale, short *dest) +get_intra_block_B14(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest) { int i, val; const DCTtab *tab; @@ -742,12 +735,10 @@ get_intra_block_B14(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan normal_code: bs->vlc.buf <<= tab->len; bs->vlc.bits += tab->len + 1; - val = (tab->level * quantizer_scale * quant_matrix[i]) >> 4; + val = tab->level * quantizer_scale; - /* if (bitstream_get (1)) val = -val; */ val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1); - SATURATE (val); dest[i] = val; bs->vlc.buf <<= 1; @@ -771,9 +762,8 @@ get_intra_block_B14(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan vl_vlc_dumpbits(&bs->vlc, 12); vl_vlc_needbits(&bs->vlc); - val = (vl_vlc_sbits(&bs->vlc, 12) * quantizer_scale * quant_matrix[i]) / 16; + val = vl_vlc_sbits(&bs->vlc, 12) * quantizer_scale; - SATURATE (val); dest[i] = val; vl_vlc_dumpbits(&bs->vlc, 12); @@ -811,7 +801,7 @@ get_intra_block_B14(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan } static inline void -get_intra_block_B15(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quantizer_scale, short *dest) +get_intra_block_B15(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest) { int i, val; const DCTtab * tab; @@ -831,12 +821,10 @@ get_intra_block_B15(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan normal_code: bs->vlc.buf <<= tab->len; bs->vlc.bits += tab->len + 1; - val = (tab->level * quantizer_scale * quant_matrix[i]) >> 4; + val = tab->level * quantizer_scale; - /* if (bitstream_get (1)) val = -val; */ val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1); - SATURATE (val); dest[i] = val; bs->vlc.buf <<= 1; @@ -859,9 +847,8 @@ get_intra_block_B15(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan vl_vlc_dumpbits(&bs->vlc, 12); vl_vlc_needbits(&bs->vlc); - val = (vl_vlc_sbits(&bs->vlc, 12) * quantizer_scale * quant_matrix[i]) / 16; + val = vl_vlc_sbits(&bs->vlc, 12) * quantizer_scale; - SATURATE (val); dest[i] = val; vl_vlc_dumpbits(&bs->vlc, 12); @@ -900,7 +887,7 @@ get_intra_block_B15(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan } static inline void -get_non_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quantizer_scale, short *dest) +get_non_intra_block(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest) { int i, val; const DCTtab *tab; @@ -927,12 +914,10 @@ get_non_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan normal_code: bs->vlc.buf <<= tab->len; bs->vlc.bits += tab->len + 1; - val = ((2*tab->level+1) * quantizer_scale * quant_matrix[i]) >> 5; + val = ((2*tab->level+1) * quantizer_scale) >> 1; - /* if (bitstream_get (1)) val = -val; */ val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1); - SATURATE (val); dest[i] = val; bs->vlc.buf <<= 1; @@ -960,9 +945,8 @@ get_non_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan vl_vlc_dumpbits(&bs->vlc, 12); vl_vlc_needbits(&bs->vlc); val = 2 * (vl_vlc_sbits(&bs->vlc, 12) + vl_vlc_sbits(&bs->vlc, 1)) + 1; - val = (val * quantizer_scale * quant_matrix[i]) / 32; + val = (val * quantizer_scale) / 2; - SATURATE (val); dest[i] = val; vl_vlc_dumpbits(&bs->vlc, 12); @@ -999,7 +983,7 @@ get_non_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan } static inline void -get_mpeg1_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quantizer_scale, short *dest) +get_mpeg1_intra_block(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest) { int i, val; const DCTtab * tab; @@ -1020,7 +1004,7 @@ get_mpeg1_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int qu normal_code: bs->vlc.buf <<= tab->len; bs->vlc.bits += tab->len + 1; - val = (tab->level * quantizer_scale * quant_matrix[i]) >> 4; + val = tab->level * quantizer_scale; /* oddification */ val = (val - 1) | 1; @@ -1028,7 +1012,6 @@ get_mpeg1_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int qu /* if (bitstream_get (1)) val = -val; */ val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1); - SATURATE (val); dest[i] = val; bs->vlc.buf <<= 1; @@ -1057,12 +1040,11 @@ get_mpeg1_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int qu vl_vlc_dumpbits(&bs->vlc, 8); val = vl_vlc_ubits(&bs->vlc, 8) + 2 * val; } - val = (val * quantizer_scale * quant_matrix[i]) / 16; + val = val * quantizer_scale; /* oddification */ val = (val + ~SBITS (val, 1)) | 1; - SATURATE (val); dest[i] = val; vl_vlc_dumpbits(&bs->vlc, 8); @@ -1099,7 +1081,7 @@ get_mpeg1_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int qu } static inline void -get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quantizer_scale, short *dest) +get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, int quantizer_scale, short *dest) { int i, val; const DCTtab * tab; @@ -1126,7 +1108,7 @@ get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], in normal_code: bs->vlc.buf <<= tab->len; bs->vlc.bits += tab->len + 1; - val = ((2*tab->level+1) * quantizer_scale * quant_matrix[i]) >> 5; + val = ((2*tab->level+1) * quantizer_scale) >> 1; /* oddification */ val = (val - 1) | 1; @@ -1134,7 +1116,6 @@ get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], in /* if (bitstream_get (1)) val = -val; */ val = (val ^ vl_vlc_sbits(&bs->vlc, 1)) - vl_vlc_sbits(&bs->vlc, 1); - SATURATE (val); dest[i] = val; bs->vlc.buf <<= 1; @@ -1167,12 +1148,11 @@ get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], in val = vl_vlc_ubits(&bs->vlc, 8) + 2 * val; } val = 2 * (val + SBITS (val, 1)) + 1; - val = (val * quantizer_scale * quant_matrix[i]) / 32; + val = (val * quantizer_scale) / 2; /* oddification */ val = (val + ~SBITS (val, 1)) | 1; - SATURATE (val); dest[i] = val; vl_vlc_dumpbits(&bs->vlc, 8); @@ -1209,7 +1189,7 @@ get_mpeg1_non_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], in } static inline void -slice_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, const int quant_matrix[64], int cc, +slice_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, int cc, unsigned x, unsigned y, enum pipe_mpeg12_dct_type coding, int quantizer_scale, int dc_dct_pred[3]) { short dest[64]; @@ -1228,14 +1208,14 @@ slice_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pictur dc_dct_pred[cc] += get_chroma_dc_dct_diff(bs); memset(dest, 0, sizeof(int16_t) * 64); - dest[0] = dc_dct_pred[cc] << (3 - picture->intra_dc_precision); + dest[0] = dc_dct_pred[cc]; if (picture->mpeg1) { if (picture->picture_coding_type != D_TYPE) - get_mpeg1_intra_block(bs, quant_matrix, quantizer_scale, dest); + get_mpeg1_intra_block(bs, quantizer_scale, dest); } else if (picture->intra_vlc_format) - get_intra_block_B15(bs, quant_matrix, quantizer_scale, dest); + get_intra_block_B15(bs, quantizer_scale, dest); else - get_intra_block_B14(bs, quant_matrix, quantizer_scale, dest); + get_intra_block_B14(bs, quantizer_scale, dest); memcpy(bs->ycbcr_buffer[cc], dest, sizeof(int16_t) * 64); @@ -1245,7 +1225,7 @@ slice_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pictur } static inline void -slice_non_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, const int quant_matrix[64], int cc, +slice_non_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, int cc, unsigned x, unsigned y, enum pipe_mpeg12_dct_type coding, int quantizer_scale) { short dest[64]; @@ -1257,9 +1237,9 @@ slice_non_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi memset(dest, 0, sizeof(int16_t) * 64); if (picture->mpeg1) - get_mpeg1_non_intra_block(bs, quant_matrix, quantizer_scale, dest); + get_mpeg1_non_intra_block(bs, quantizer_scale, dest); else - get_non_intra_block(bs, quant_matrix, quantizer_scale, dest); + get_non_intra_block(bs, quantizer_scale, dest); memcpy(bs->ycbcr_buffer[cc], dest, sizeof(int16_t) * 64); @@ -1571,8 +1551,7 @@ slice_init(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * picture, } static inline bool -decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture, - const int intra_quantizer_matrix[64], const int non_intra_quantizer_matrix[64]) +decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture) { enum pipe_video_field_select default_field_select; struct pipe_motionvector mv_fwd, mv_bwd; @@ -1659,12 +1638,12 @@ decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture, mv_bwd.top.weight = mv_bwd.bottom.weight = PIPE_VIDEO_MV_WEIGHT_MIN; // unravaled loop of 6 block(i) calls in macroblock() - slice_intra_DCT(bs, picture, intra_quantizer_matrix, 0, x*2+0, y*2+0, dct_type, quantizer_scale, dc_dct_pred); - slice_intra_DCT(bs, picture, intra_quantizer_matrix, 0, x*2+1, y*2+0, dct_type, quantizer_scale, dc_dct_pred); - slice_intra_DCT(bs, picture, intra_quantizer_matrix, 0, x*2+0, y*2+1, dct_type, quantizer_scale, dc_dct_pred); - slice_intra_DCT(bs, picture, intra_quantizer_matrix, 0, x*2+1, y*2+1, dct_type, quantizer_scale, dc_dct_pred); - slice_intra_DCT(bs, picture, intra_quantizer_matrix, 1, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale, dc_dct_pred); - slice_intra_DCT(bs, picture, intra_quantizer_matrix, 2, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale, dc_dct_pred); + slice_intra_DCT(bs, picture, 0, x*2+0, y*2+0, dct_type, quantizer_scale, dc_dct_pred); + slice_intra_DCT(bs, picture, 0, x*2+1, y*2+0, dct_type, quantizer_scale, dc_dct_pred); + slice_intra_DCT(bs, picture, 0, x*2+0, y*2+1, dct_type, quantizer_scale, dc_dct_pred); + slice_intra_DCT(bs, picture, 0, x*2+1, y*2+1, dct_type, quantizer_scale, dc_dct_pred); + slice_intra_DCT(bs, picture, 1, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale, dc_dct_pred); + slice_intra_DCT(bs, picture, 2, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale, dc_dct_pred); if (picture->picture_coding_type == D_TYPE) { vl_vlc_needbits(&bs->vlc); @@ -1722,17 +1701,17 @@ decode_slice(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc *picture, // TODO optimize not fully used for idct accel only mc. if (coded_block_pattern & 0x20) - slice_non_intra_DCT(bs, picture, non_intra_quantizer_matrix, 0, x*2+0, y*2+0, dct_type, quantizer_scale); // cc0 luma 0 + slice_non_intra_DCT(bs, picture, 0, x*2+0, y*2+0, dct_type, quantizer_scale); // cc0 luma 0 if (coded_block_pattern & 0x10) - slice_non_intra_DCT(bs, picture, non_intra_quantizer_matrix, 0, x*2+1, y*2+0, dct_type, quantizer_scale); // cc0 luma 1 + slice_non_intra_DCT(bs, picture, 0, x*2+1, y*2+0, dct_type, quantizer_scale); // cc0 luma 1 if (coded_block_pattern & 0x08) - slice_non_intra_DCT(bs, picture, non_intra_quantizer_matrix, 0, x*2+0, y*2+1, dct_type, quantizer_scale); // cc0 luma 2 + slice_non_intra_DCT(bs, picture, 0, x*2+0, y*2+1, dct_type, quantizer_scale); // cc0 luma 2 if (coded_block_pattern & 0x04) - slice_non_intra_DCT(bs, picture, non_intra_quantizer_matrix, 0, x*2+1, y*2+1, dct_type, quantizer_scale); // cc0 luma 3 + slice_non_intra_DCT(bs, picture, 0, x*2+1, y*2+1, dct_type, quantizer_scale); // cc0 luma 3 if (coded_block_pattern & 0x2) - slice_non_intra_DCT(bs, picture, non_intra_quantizer_matrix, 1, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale); // cc1 croma + slice_non_intra_DCT(bs, picture, 1, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale); // cc1 croma if (coded_block_pattern & 0x1) - slice_non_intra_DCT(bs, picture, non_intra_quantizer_matrix, 2, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale); // cc2 croma + slice_non_intra_DCT(bs, picture, 2, x, y, PIPE_MPEG12_DCT_TYPE_FRAME, quantizer_scale); // cc2 croma } dc_dct_pred[0] = dc_dct_pred[1] = dc_dct_pred[2] = 0; @@ -1845,12 +1824,6 @@ void vl_mpg12_bs_decode(struct vl_mpg12_bs *bs, unsigned num_bytes, const void *buffer, struct pipe_mpeg12_picture_desc *picture, unsigned num_ycbcr_blocks[3]) { - int intra_quantizer_matrix[64]; - int non_intra_quantizer_matrix[64]; - - const int *scan; - unsigned i; - assert(bs); assert(num_ycbcr_blocks); assert(buffer && num_bytes); @@ -1859,11 +1832,5 @@ vl_mpg12_bs_decode(struct vl_mpg12_bs *bs, unsigned num_bytes, const void *buffe vl_vlc_init(&bs->vlc, buffer, num_bytes); - scan = picture->alternate_scan ? vl_zscan_alternate : vl_zscan_normal; - for (i = 0; i < 64; ++i) { - intra_quantizer_matrix[i] = picture->intra_quantizer_matrix[scan[i]]; - non_intra_quantizer_matrix[i] = picture->non_intra_quantizer_matrix[scan[i]]; - } - - while(decode_slice(bs, picture, intra_quantizer_matrix, non_intra_quantizer_matrix)); + while(decode_slice(bs, picture)); } diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c index f96d7f0..ca790e7 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c +++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c @@ -312,8 +312,21 @@ vl_mpeg12_buffer_map(struct pipe_video_decode_buffer *buffer) vl_mpg12_bs_set_buffers(&buf->bs, ycbcr_stream, buf->texels, mv_stream); } else { - for (i = 0; i < VL_MAX_PLANES; ++i) + static const uint8_t dummy_quant[64] = { + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10 + }; + + for (i = 0; i < VL_MAX_PLANES; ++i) { vl_zscan_set_layout(&buf->zscan[i], dec->zscan_linear); + vl_zscan_upload_quant(&buf->zscan[i], dummy_quant, dummy_quant); + } } } @@ -365,6 +378,7 @@ vl_mpeg12_buffer_decode_bitstream(struct pipe_video_decode_buffer *buffer, unsigned num_ycbcr_blocks[3]) { struct vl_mpeg12_buffer *buf = (struct vl_mpeg12_buffer*)buffer; + uint8_t intra_quantizer_matrix[64]; struct vl_mpeg12_decoder *dec; unsigned i; @@ -373,8 +387,13 @@ vl_mpeg12_buffer_decode_bitstream(struct pipe_video_decode_buffer *buffer, dec = (struct vl_mpeg12_decoder *)buf->base.decoder; assert(dec); - for (i = 0; i < VL_MAX_PLANES; ++i) + memcpy(intra_quantizer_matrix, picture->intra_quantizer_matrix, sizeof(intra_quantizer_matrix)); + intra_quantizer_matrix[0] = 1 << (7 - picture->intra_dc_precision); + + for (i = 0; i < VL_MAX_PLANES; ++i) { vl_zscan_set_layout(&buf->zscan[i], picture->alternate_scan ? dec->zscan_alternate : dec->zscan_normal); + vl_zscan_upload_quant(&buf->zscan[i], intra_quantizer_matrix, picture->non_intra_quantizer_matrix); + } vl_mpg12_bs_decode(&buf->bs, num_bytes, data, picture, num_ycbcr_blocks); } diff --git a/src/gallium/auxiliary/vl/vl_zscan.c b/src/gallium/auxiliary/vl/vl_zscan.c index 0f468df..4af3962 100644 --- a/src/gallium/auxiliary/vl/vl_zscan.c +++ b/src/gallium/auxiliary/vl/vl_zscan.c @@ -136,11 +136,11 @@ create_vert_shader(struct vl_zscan *zscan) ureg_MUL(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), ureg_src(tmp), scale); ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f)); - ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XZ), ureg_scalar(instance, TGSI_SWIZZLE_X), + ureg_MUL(shader, ureg_writemask(tmp, TGSI_WRITEMASK_XW), ureg_scalar(instance, TGSI_SWIZZLE_X), ureg_imm1f(shader, 1.0f / zscan->blocks_per_line)); ureg_FRC(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X)); - ureg_FLR(shader, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_src(tmp)); + ureg_FLR(shader, ureg_writemask(tmp, TGSI_WRITEMASK_W), ureg_src(tmp)); for (i = 0; i < zscan->num_channels; ++i) { ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y), @@ -149,7 +149,8 @@ create_vert_shader(struct vl_zscan *zscan) ureg_MAD(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_X), vrect, ureg_imm1f(shader, 1.0f / zscan->blocks_per_line), ureg_src(tmp)); ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Y), vrect); - ureg_MUL(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Z), ureg_src(tmp), + ureg_MOV(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_Z), vpos); + ureg_MUL(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_W), ureg_src(tmp), ureg_imm1f(shader, (float)zscan->blocks_per_line / zscan->blocks_total)); } @@ -165,10 +166,10 @@ create_frag_shader(struct vl_zscan *zscan) struct ureg_program *shader; struct ureg_src vtex[zscan->num_channels]; - struct ureg_src src, scan, quant; + struct ureg_src samp_src, samp_scan, samp_quant; struct ureg_dst tmp[zscan->num_channels]; - struct ureg_dst fragment; + struct ureg_dst quant, fragment; unsigned i; @@ -179,12 +180,13 @@ create_frag_shader(struct vl_zscan *zscan) for (i = 0; i < zscan->num_channels; ++i) vtex[i] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX + i, TGSI_INTERPOLATE_LINEAR); - src = ureg_DECL_sampler(shader, 0); - scan = ureg_DECL_sampler(shader, 1); - quant = ureg_DECL_sampler(shader, 2); + samp_src = ureg_DECL_sampler(shader, 0); + samp_scan = ureg_DECL_sampler(shader, 1); + samp_quant = ureg_DECL_sampler(shader, 2); for (i = 0; i < zscan->num_channels; ++i) tmp[i] = ureg_DECL_temporary(shader); + quant = ureg_DECL_temporary(shader); fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); @@ -194,17 +196,18 @@ create_frag_shader(struct vl_zscan *zscan) * fragment = tex(tmp, 0) * quant */ for (i = 0; i < zscan->num_channels; ++i) - ureg_TEX(shader, ureg_writemask(tmp[i], TGSI_WRITEMASK_X), TGSI_TEXTURE_2D, vtex[i], scan); + ureg_TEX(shader, ureg_writemask(tmp[i], TGSI_WRITEMASK_X), TGSI_TEXTURE_2D, vtex[i], samp_scan); for (i = 0; i < zscan->num_channels; ++i) - ureg_MOV(shader, ureg_writemask(tmp[i], TGSI_WRITEMASK_Y), ureg_scalar(vtex[i], TGSI_SWIZZLE_Z)); + ureg_MOV(shader, ureg_writemask(tmp[i], TGSI_WRITEMASK_Y), ureg_scalar(vtex[i], TGSI_SWIZZLE_W)); - for (i = 0; i < zscan->num_channels; ++i) - ureg_TEX(shader, tmp[i], TGSI_TEXTURE_2D, ureg_src(tmp[i]), src); + for (i = 0; i < zscan->num_channels; ++i) { + ureg_TEX(shader, ureg_writemask(tmp[0], TGSI_WRITEMASK_X << i), TGSI_TEXTURE_2D, ureg_src(tmp[i]), samp_src); + ureg_TEX(shader, ureg_writemask(quant, TGSI_WRITEMASK_X << i), TGSI_TEXTURE_3D, vtex[i], samp_quant); + } - // TODO: Fetch quant and use it - for (i = 0; i < zscan->num_channels; ++i) - ureg_MUL(shader, ureg_writemask(fragment, TGSI_WRITEMASK_X << i), ureg_src(tmp[i]), ureg_imm1f(shader, 1.0f)); + ureg_MUL(shader, quant, ureg_src(quant), ureg_imm1f(shader, 16.0f)); + ureg_MUL(shader, fragment, ureg_src(tmp[0]), ureg_src(quant)); for (i = 0; i < zscan->num_channels; ++i) ureg_release_temporary(shader, tmp[i]); @@ -283,7 +286,7 @@ init_state(struct vl_zscan *zscan) memset(&sampler, 0, sizeof(sampler)); sampler.wrap_s = PIPE_TEX_WRAP_REPEAT; sampler.wrap_t = PIPE_TEX_WRAP_REPEAT; - sampler.wrap_r = PIPE_TEX_WRAP_REPEAT; + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST; sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; @@ -413,15 +416,6 @@ error_resource: return NULL; } -#if 0 -// TODO -struct pipe_sampler_view * -vl_zscan_normal(struct pipe_context *pipe, unsigned blocks_per_line); - -struct pipe_sampler_view * -vl_zscan_alternate(struct pipe_context *pipe, unsigned blocks_per_line); -#endif - bool vl_zscan_init(struct vl_zscan *zscan, struct pipe_context *pipe, unsigned buffer_width, unsigned buffer_height, @@ -457,16 +451,13 @@ vl_zscan_cleanup(struct vl_zscan *zscan) cleanup_state(zscan); } -#if 0 -// TODO -void -vl_zscan_upload_quant(struct vl_zscan *zscan, ...); -#endif - bool vl_zscan_init_buffer(struct vl_zscan *zscan, struct vl_zscan_buffer *buffer, struct pipe_sampler_view *src, struct pipe_surface *dst) { + struct pipe_resource res_tmpl, *res; + struct pipe_sampler_view sv_tmpl; + assert(zscan && buffer); memset(buffer, 0, sizeof(struct vl_zscan_buffer)); @@ -489,6 +480,28 @@ vl_zscan_init_buffer(struct vl_zscan *zscan, struct vl_zscan_buffer *buffer, buffer->fb_state.nr_cbufs = 1; pipe_surface_reference(&buffer->fb_state.cbufs[0], dst); + memset(&res_tmpl, 0, sizeof(res_tmpl)); + res_tmpl.target = PIPE_TEXTURE_3D; + res_tmpl.format = PIPE_FORMAT_R8_UNORM; + res_tmpl.width0 = BLOCK_WIDTH * zscan->blocks_per_line; + res_tmpl.height0 = BLOCK_HEIGHT; + res_tmpl.depth0 = 2; + res_tmpl.array_size = 1; + res_tmpl.usage = PIPE_USAGE_IMMUTABLE; + res_tmpl.bind = PIPE_BIND_SAMPLER_VIEW; + + res = zscan->pipe->screen->resource_create(zscan->pipe->screen, &res_tmpl); + if (!res) + return false; + + memset(&sv_tmpl, 0, sizeof(sv_tmpl)); + u_sampler_view_default_template(&sv_tmpl, res, res->format); + sv_tmpl.swizzle_r = sv_tmpl.swizzle_g = sv_tmpl.swizzle_b = sv_tmpl.swizzle_a = TGSI_SWIZZLE_X; + buffer->quant = zscan->pipe->create_sampler_view(zscan->pipe, res, &sv_tmpl); + pipe_resource_reference(&res, NULL); + if (!buffer->quant) + return false; + return true; } @@ -513,6 +526,65 @@ vl_zscan_set_layout(struct vl_zscan_buffer *buffer, struct pipe_sampler_view *la } void +vl_zscan_upload_quant(struct vl_zscan_buffer *buffer, + const uint8_t intra_matrix[64], + const uint8_t non_intra_matrix[64]) +{ + struct pipe_context *pipe; + struct pipe_transfer *buf_transfer; + unsigned x, y, i, pitch; + uint8_t *intra, *non_intra; + + struct pipe_box rect = + { + 0, 0, 0, + BLOCK_WIDTH, + BLOCK_HEIGHT, + 2 + }; + + assert(buffer); + assert(intra_matrix); + assert(non_intra_matrix); + + pipe = buffer->zscan->pipe; + + rect.width *= buffer->zscan->blocks_per_line; + + buf_transfer = pipe->get_transfer + ( + pipe, buffer->quant->texture, + 0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, + &rect + ); + if (!buf_transfer) + goto error_transfer; + + pitch = buf_transfer->stride; + + non_intra = pipe->transfer_map(pipe, buf_transfer); + if (!non_intra) + goto error_map; + + intra = non_intra + BLOCK_HEIGHT * pitch; + + for (i = 0; i < buffer->zscan->blocks_per_line; ++i) + for (y = 0; y < BLOCK_HEIGHT; ++y) + for (x = 0; x < BLOCK_WIDTH; ++x) { + intra[i * BLOCK_WIDTH + y * pitch + x] = intra_matrix[x + y * BLOCK_WIDTH]; + non_intra[i * BLOCK_WIDTH + y * pitch + x] = non_intra_matrix[x + y * BLOCK_WIDTH]; + } + + pipe->transfer_unmap(pipe, buf_transfer); + +error_map: + pipe->transfer_destroy(pipe, buf_transfer); + +error_transfer: + return; +} + +void vl_zscan_render(struct vl_zscan_buffer *buffer, unsigned num_instances) { struct vl_zscan *zscan; @@ -523,10 +595,10 @@ vl_zscan_render(struct vl_zscan_buffer *buffer, unsigned num_instances) zscan->pipe->bind_rasterizer_state(zscan->pipe, zscan->rs_state); zscan->pipe->bind_blend_state(zscan->pipe, zscan->blend); - zscan->pipe->bind_fragment_sampler_states(zscan->pipe, 2, zscan->samplers); + zscan->pipe->bind_fragment_sampler_states(zscan->pipe, 3, zscan->samplers); zscan->pipe->set_framebuffer_state(zscan->pipe, &buffer->fb_state); zscan->pipe->set_viewport_state(zscan->pipe, &buffer->viewport); - zscan->pipe->set_fragment_sampler_views(zscan->pipe, 2, &buffer->src); + zscan->pipe->set_fragment_sampler_views(zscan->pipe, 3, &buffer->src); zscan->pipe->bind_vs_state(zscan->pipe, zscan->vs); zscan->pipe->bind_fs_state(zscan->pipe, zscan->fs); util_draw_arrays_instanced(zscan->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances); diff --git a/src/gallium/auxiliary/vl/vl_zscan.h b/src/gallium/auxiliary/vl/vl_zscan.h index ccc6bc4..be12b8e 100644 --- a/src/gallium/auxiliary/vl/vl_zscan.h +++ b/src/gallium/auxiliary/vl/vl_zscan.h @@ -53,8 +53,6 @@ struct vl_zscan void *samplers[3]; void *vs, *fs; - - struct pipe_sampler_view *quant; }; struct vl_zscan_buffer @@ -84,11 +82,6 @@ vl_zscan_init(struct vl_zscan *zscan, struct pipe_context *pipe, void vl_zscan_cleanup(struct vl_zscan *zscan); -#if 0 -void -vl_zscan_upload_quant(struct vl_zscan *zscan, ...); -#endif - bool vl_zscan_init_buffer(struct vl_zscan *zscan, struct vl_zscan_buffer *buffer, struct pipe_sampler_view *src, struct pipe_surface *dst); @@ -100,6 +93,11 @@ void vl_zscan_set_layout(struct vl_zscan_buffer *buffer, struct pipe_sampler_view *layout); void +vl_zscan_upload_quant(struct vl_zscan_buffer *buffer, + const uint8_t intra_matrix[64], + const uint8_t non_intra_matrix[64]); + +void vl_zscan_render(struct vl_zscan_buffer *buffer, unsigned num_instances); #endif -- 2.7.4