From 31096e13f858daf896c0c53077fb25e92da089a6 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Christian=20K=C3=B6nig?= Date: Wed, 24 Aug 2011 22:10:42 +0200 Subject: [PATCH] g3dvl: Use a single texture for luma and chroma data MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Christian König Reviewed-by: Younes Manton --- src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c | 20 ++-- src/gallium/auxiliary/vl/vl_mpeg12_bitstream.h | 5 +- src/gallium/auxiliary/vl/vl_mpeg12_decoder.c | 146 ++++++++++++------------- src/gallium/auxiliary/vl/vl_mpeg12_decoder.h | 8 +- src/gallium/auxiliary/vl/vl_vertex_buffers.c | 49 +-------- src/gallium/auxiliary/vl/vl_vertex_buffers.h | 3 +- 6 files changed, 91 insertions(+), 140 deletions(-) diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c index 61ffcd1..bc88929 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c +++ b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c @@ -1199,6 +1199,7 @@ slice_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pictur bs->ycbcr_stream[cc]->y = y; bs->ycbcr_stream[cc]->intra = 1; bs->ycbcr_stream[cc]->coding = coding; + bs->ycbcr_stream[cc]->block_num = bs->block_num++; vl_vlc_needbits(&bs->vlc); @@ -1218,11 +1219,11 @@ slice_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pictur else get_intra_block_B14(bs, quantizer_scale, dest); - memcpy(bs->ycbcr_buffer[cc], dest, sizeof(int16_t) * 64); + memcpy(bs->ycbcr_buffer, dest, sizeof(int16_t) * 64); bs->num_ycbcr_blocks[cc]++; bs->ycbcr_stream[cc]++; - bs->ycbcr_buffer[cc] += 64; + bs->ycbcr_buffer += 64; } static INLINE void @@ -1235,6 +1236,7 @@ slice_non_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi bs->ycbcr_stream[cc]->y = y; bs->ycbcr_stream[cc]->intra = 0; bs->ycbcr_stream[cc]->coding = coding; + bs->ycbcr_stream[cc]->block_num = bs->block_num++; memset(dest, 0, sizeof(int16_t) * 64); if (picture->base.profile == PIPE_VIDEO_PROFILE_MPEG1) @@ -1242,11 +1244,11 @@ slice_non_intra_DCT(struct vl_mpg12_bs *bs, struct pipe_mpeg12_picture_desc * pi else get_non_intra_block(bs, quantizer_scale, dest); - memcpy(bs->ycbcr_buffer[cc], dest, sizeof(int16_t) * 64); + memcpy(bs->ycbcr_buffer, dest, sizeof(int16_t) * 64); bs->num_ycbcr_blocks[cc]++; bs->ycbcr_stream[cc]++; - bs->ycbcr_buffer[cc] += 64; + bs->ycbcr_buffer += 64; } static INLINE void @@ -1788,7 +1790,7 @@ vl_mpg12_bs_init(struct vl_mpg12_bs *bs, unsigned width, unsigned height) void vl_mpg12_bs_set_buffers(struct vl_mpg12_bs *bs, struct vl_ycbcr_block *ycbcr_stream[VL_MAX_PLANES], - short *ycbcr_buffer[VL_MAX_PLANES], struct vl_motionvector *mv_stream[VL_MAX_REF_FRAMES]) + short *ycbcr_buffer, struct vl_motionvector *mv_stream[VL_MAX_REF_FRAMES]) { unsigned i; @@ -1796,10 +1798,12 @@ vl_mpg12_bs_set_buffers(struct vl_mpg12_bs *bs, struct vl_ycbcr_block *ycbcr_str assert(ycbcr_stream && ycbcr_buffer); assert(mv_stream); - for (i = 0; i < VL_MAX_PLANES; ++i) { + bs->block_num = 0; + + for (i = 0; i < VL_MAX_PLANES; ++i) bs->ycbcr_stream[i] = ycbcr_stream[i]; - bs->ycbcr_buffer[i] = ycbcr_buffer[i]; - } + bs->ycbcr_buffer = ycbcr_buffer; + for (i = 0; i < VL_MAX_REF_FRAMES; ++i) bs->mv_stream[i] = mv_stream[i]; diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.h b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.h index 8a35dc4..797a7e7 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.h +++ b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.h @@ -38,10 +38,11 @@ struct vl_mpg12_bs struct vl_vlc vlc; + unsigned block_num; unsigned *num_ycbcr_blocks; struct vl_ycbcr_block *ycbcr_stream[VL_MAX_PLANES]; - short *ycbcr_buffer[VL_MAX_PLANES]; + short *ycbcr_buffer; struct vl_motionvector *mv_stream[VL_MAX_REF_FRAMES]; }; @@ -51,7 +52,7 @@ vl_mpg12_bs_init(struct vl_mpg12_bs *bs, unsigned width, unsigned height); void vl_mpg12_bs_set_buffers(struct vl_mpg12_bs *bs, struct vl_ycbcr_block *ycbcr_stream[VL_MAX_PLANES], - short *ycbcr_buffer[VL_MAX_PLANES], struct vl_motionvector *mv_stream[VL_MAX_REF_FRAMES]); + short *ycbcr_buffer, struct vl_motionvector *mv_stream[VL_MAX_REF_FRAMES]); void vl_mpg12_bs_decode(struct vl_mpg12_bs *bs, unsigned num_bytes, const void *buffer, diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c index 8100f80..a270667 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c +++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c @@ -30,6 +30,7 @@ #include #include +#include #include #include "vl_mpeg12_decoder.h" @@ -84,29 +85,35 @@ static const unsigned const_empty_block_mask_420[3][2][2] = { static bool init_zscan_buffer(struct vl_mpeg12_decoder *dec, struct vl_mpeg12_buffer *buffer) { - enum pipe_format formats[3]; - - struct pipe_sampler_view **source; + struct pipe_resource *res, res_tmpl; + struct pipe_sampler_view sv_tmpl; struct pipe_surface **destination; unsigned i; assert(dec && buffer); - formats[0] = formats[1] = formats[2] = dec->zscan_source_format; - buffer->zscan_source = vl_video_buffer_create_ex - ( - dec->base.context, - dec->blocks_per_line * BLOCK_WIDTH * BLOCK_HEIGHT, - align(dec->num_blocks, dec->blocks_per_line) / dec->blocks_per_line, - 1, PIPE_VIDEO_CHROMA_FORMAT_444, formats, PIPE_USAGE_STATIC - ); - - if (!buffer->zscan_source) + memset(&res_tmpl, 0, sizeof(res_tmpl)); + res_tmpl.target = PIPE_TEXTURE_2D; + res_tmpl.format = dec->zscan_source_format; + res_tmpl.width0 = dec->blocks_per_line * BLOCK_WIDTH * BLOCK_HEIGHT; + res_tmpl.height0 = align(dec->num_blocks, dec->blocks_per_line) / dec->blocks_per_line; + res_tmpl.depth0 = 1; + res_tmpl.array_size = 1; + res_tmpl.usage = PIPE_USAGE_STREAM; + res_tmpl.bind = PIPE_BIND_SAMPLER_VIEW; + + res = dec->base.context->screen->resource_create(dec->base.context->screen, &res_tmpl); + if (!res) goto error_source; - source = buffer->zscan_source->get_sampler_view_planes(buffer->zscan_source); - if (!source) + + memset(&sv_tmpl, 0, sizeof(sv_tmpl)); + u_sampler_view_default_template(&sv_tmpl, res, res->format); + sv_tmpl.swizzle_r = sv_tmpl.swizzle_g = sv_tmpl.swizzle_b = sv_tmpl.swizzle_a = PIPE_SWIZZLE_RED; + buffer->zscan_source = dec->base.context->create_sampler_view(dec->base.context, res, &sv_tmpl); + pipe_resource_reference(&res, NULL); + if (!buffer->zscan_source) goto error_sampler; if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) @@ -119,7 +126,7 @@ init_zscan_buffer(struct vl_mpeg12_decoder *dec, struct vl_mpeg12_buffer *buffer for (i = 0; i < VL_MAX_PLANES; ++i) if (!vl_zscan_init_buffer(i == 0 ? &dec->zscan_y : &dec->zscan_c, - &buffer->zscan[i], source[i], destination[i])) + &buffer->zscan[i], buffer->zscan_source, destination[i])) goto error_plane; return true; @@ -130,7 +137,7 @@ error_plane: error_surface: error_sampler: - buffer->zscan_source->destroy(buffer->zscan_source); + pipe_sampler_view_reference(&buffer->zscan_source, NULL); error_source: return false; @@ -145,7 +152,8 @@ cleanup_zscan_buffer(struct vl_mpeg12_buffer *buffer) for (i = 0; i < VL_MAX_PLANES; ++i) vl_zscan_cleanup_buffer(&buffer->zscan[i]); - buffer->zscan_source->destroy(buffer->zscan_source); + + pipe_sampler_view_reference(&buffer->zscan_source, NULL); } static bool @@ -321,8 +329,7 @@ UploadYcbcrBlocks(struct vl_mpeg12_decoder *dec, const struct pipe_mpeg12_macroblock *mb) { unsigned intra; - unsigned tb, x, y, luma_blocks; - short *blocks; + unsigned tb, x, y, num_blocks = 0; assert(dec && buf); assert(mb); @@ -330,10 +337,9 @@ UploadYcbcrBlocks(struct vl_mpeg12_decoder *dec, if (!mb->coded_block_pattern) return; - blocks = mb->blocks; intra = mb->macroblock_type & PIPE_MPEG12_MB_TYPE_INTRA ? 1 : 0; - for (y = 0, luma_blocks = 0; y < 2; ++y) { + for (y = 0; y < 2; ++y) { for (x = 0; x < 2; ++x, ++tb) { if (mb->coded_block_pattern & const_empty_block_mask_420[0][y][x]) { @@ -342,21 +348,16 @@ UploadYcbcrBlocks(struct vl_mpeg12_decoder *dec, stream->y = mb->y * 2 + y; stream->intra = intra; stream->coding = mb->macroblock_modes.bits.dct_type; + stream->block_num = buf->block_num++; buf->num_ycbcr_blocks[0]++; buf->ycbcr_stream[0]++; - luma_blocks++; + num_blocks++; } } } - if (luma_blocks > 0) { - memcpy(buf->texels[0], blocks, 64 * sizeof(short) * luma_blocks); - buf->texels[0] += 64 * luma_blocks; - blocks += 64 * luma_blocks; - } - /* TODO: Implement 422, 444 */ //assert(ctx->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420); @@ -368,15 +369,17 @@ UploadYcbcrBlocks(struct vl_mpeg12_decoder *dec, stream->y = mb->y; stream->intra = intra; stream->coding = 0; + stream->block_num = buf->block_num++; buf->num_ycbcr_blocks[tb]++; buf->ycbcr_stream[tb]++; - memcpy(buf->texels[tb], blocks, 64 * sizeof(short)); - buf->texels[tb] += 64; - blocks += 64; + num_blocks++; } } + + memcpy(buf->texels, mb->blocks, 64 * sizeof(short) * num_blocks); + buf->texels += 64 * num_blocks; } static void @@ -411,7 +414,6 @@ vl_mpeg12_destroy(struct pipe_video_decoder *decoder) pipe_resource_reference(&dec->quads.buffer, NULL); pipe_resource_reference(&dec->pos.buffer, NULL); - pipe_resource_reference(&dec->block_num.buffer, NULL); pipe_sampler_view_reference(&dec->zscan_linear, NULL); pipe_sampler_view_reference(&dec->zscan_normal, NULL); @@ -567,9 +569,11 @@ static void vl_mpeg12_begin_frame(struct pipe_video_decoder *decoder) { struct vl_mpeg12_decoder *dec = (struct vl_mpeg12_decoder *)decoder; - struct vl_mpeg12_buffer *buf; - struct pipe_sampler_view **sampler_views; + + struct pipe_resource *tex; + struct pipe_box rect = { 0, 0, 0, 1, 1, 1 }; + unsigned i; assert(dec); @@ -587,35 +591,25 @@ vl_mpeg12_begin_frame(struct pipe_video_decoder *decoder) vl_vb_map(&buf->vertex_stream, dec->base.context); - sampler_views = buf->zscan_source->get_sampler_view_planes(buf->zscan_source); + tex = buf->zscan_source->texture; + rect.width = tex->width0; + rect.height = tex->height0; + + buf->tex_transfer = dec->base.context->get_transfer + ( + dec->base.context, tex, + 0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, + &rect + ); - assert(sampler_views); + buf->block_num = 0; + buf->texels = dec->base.context->transfer_map(dec->base.context, buf->tex_transfer); for (i = 0; i < VL_MAX_PLANES; ++i) { - struct pipe_resource *tex = sampler_views[i]->texture; - struct pipe_box rect = - { - 0, 0, 0, - tex->width0, - tex->height0, - 1 - }; - - buf->tex_transfer[i] = dec->base.context->get_transfer - ( - dec->base.context, tex, - 0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, - &rect - ); - - buf->texels[i] = dec->base.context->transfer_map(dec->base.context, buf->tex_transfer[i]); - + buf->ycbcr_stream[i] = vl_vb_get_ycbcr_stream(&buf->vertex_stream, i); buf->num_ycbcr_blocks[i] = 0; } - for (i = 0; i < VL_MAX_PLANES; ++i) - buf->ycbcr_stream[i] = vl_vb_get_ycbcr_stream(&buf->vertex_stream, i); - for (i = 0; i < VL_MAX_REF_FRAMES; ++i) buf->mv_stream[i] = vl_vb_get_mv_stream(&buf->vertex_stream, i); @@ -734,10 +728,8 @@ vl_mpeg12_end_frame(struct pipe_video_decoder *decoder) vl_vb_unmap(&buf->vertex_stream, dec->base.context); - for (i = 0; i < VL_MAX_PLANES; ++i) { - dec->base.context->transfer_unmap(dec->base.context, buf->tex_transfer[i]); - dec->base.context->transfer_destroy(dec->base.context, buf->tex_transfer[i]); - } + dec->base.context->transfer_unmap(dec->base.context, buf->tex_transfer); + dec->base.context->transfer_destroy(dec->base.context, buf->tex_transfer); vb[0] = dec->quads; vb[1] = dec->pos; @@ -758,14 +750,12 @@ vl_mpeg12_end_frame(struct pipe_video_decoder *decoder) } } - vb[2] = dec->block_num; - dec->base.context->bind_vertex_elements_state(dec->base.context, dec->ves_ycbcr); for (i = 0; i < VL_MAX_PLANES; ++i) { if (!buf->num_ycbcr_blocks[i]) continue; vb[1] = vl_vb_get_ycbcr(&buf->vertex_stream, i); - dec->base.context->set_vertex_buffers(dec->base.context, 3, vb); + dec->base.context->set_vertex_buffers(dec->base.context, 2, vb); vl_zscan_render(&buf->zscan[i] , buf->num_ycbcr_blocks[i]); @@ -782,7 +772,7 @@ vl_mpeg12_end_frame(struct pipe_video_decoder *decoder) if (!buf->num_ycbcr_blocks[i]) continue; vb[1] = vl_vb_get_ycbcr(&buf->vertex_stream, component); - dec->base.context->set_vertex_buffers(dec->base.context, 3, vb); + dec->base.context->set_vertex_buffers(dec->base.context, 2, vb); if (dec->base.entrypoint <= PIPE_VIDEO_ENTRYPOINT_IDCT) vl_idct_prepare_stage2(&buf->idct[component]); @@ -1085,31 +1075,33 @@ vl_create_mpeg12_decoder(struct pipe_context *context, dec->num_blocks = (dec->base.width * dec->base.height) / block_size_pixels; dec->width_in_macroblocks = align(dec->base.width, MACROBLOCK_WIDTH) / MACROBLOCK_WIDTH; - dec->quads = vl_vb_upload_quads(dec->base.context); - dec->pos = vl_vb_upload_pos( - dec->base.context, - dec->base.width / MACROBLOCK_WIDTH, - dec->base.height / MACROBLOCK_HEIGHT - ); - dec->block_num = vl_vb_upload_block_num(dec->base.context, dec->num_blocks); - - dec->ves_ycbcr = vl_vb_get_ves_ycbcr(dec->base.context); - dec->ves_mv = vl_vb_get_ves_mv(dec->base.context); - /* TODO: Implement 422, 444 */ assert(dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420); if (dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) { dec->chroma_width = dec->base.width / 2; dec->chroma_height = dec->base.height / 2; + dec->num_blocks = dec->num_blocks * 2; } else if (dec->base.chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422) { dec->chroma_width = dec->base.width; dec->chroma_height = dec->base.height / 2; + dec->num_blocks = dec->num_blocks * 2 + dec->num_blocks; } else { dec->chroma_width = dec->base.width; dec->chroma_height = dec->base.height; + dec->num_blocks = dec->num_blocks * 3; } + dec->quads = vl_vb_upload_quads(dec->base.context); + dec->pos = vl_vb_upload_pos( + dec->base.context, + dec->base.width / MACROBLOCK_WIDTH, + dec->base.height / MACROBLOCK_HEIGHT + ); + + dec->ves_ycbcr = vl_vb_get_ves_ycbcr(dec->base.context); + dec->ves_mv = vl_vb_get_ves_mv(dec->base.context); + switch (entrypoint) { case PIPE_VIDEO_ENTRYPOINT_BITSTREAM: format_config = find_format_config(dec, bitstream_format_config, num_bitstream_format_configs); diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h index 277f5b9..4a8d653 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h +++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.h @@ -55,7 +55,6 @@ struct vl_mpeg12_decoder struct pipe_vertex_buffer quads; struct pipe_vertex_buffer pos; - struct pipe_vertex_buffer block_num; void *ves_ycbcr; void *ves_mv; @@ -87,17 +86,18 @@ struct vl_mpeg12_buffer { struct vl_vertex_buffer vertex_stream; + unsigned block_num; unsigned num_ycbcr_blocks[3]; - struct pipe_video_buffer *zscan_source; + struct pipe_sampler_view *zscan_source; struct vl_mpg12_bs bs; struct vl_zscan_buffer zscan[VL_MAX_PLANES]; struct vl_idct_buffer idct[VL_MAX_PLANES]; struct vl_mc_buffer mc[VL_MAX_PLANES]; - struct pipe_transfer *tex_transfer[VL_MAX_PLANES]; - short *texels[VL_MAX_PLANES]; + struct pipe_transfer *tex_transfer; + short *texels; struct vl_ycbcr_block *ycbcr_stream[VL_MAX_PLANES]; struct vl_motionvector *mv_stream[VL_MAX_REF_FRAMES]; diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.c b/src/gallium/auxiliary/vl/vl_vertex_buffers.c index b7aa14b..281db80 100644 --- a/src/gallium/auxiliary/vl/vl_vertex_buffers.c +++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.c @@ -125,49 +125,6 @@ vl_vb_upload_pos(struct pipe_context *pipe, unsigned width, unsigned height) return pos; } -struct pipe_vertex_buffer -vl_vb_upload_block_num(struct pipe_context *pipe, unsigned num_blocks) -{ - struct pipe_vertex_buffer buf; - struct pipe_transfer *buf_transfer; - struct vertex2s *v; - unsigned i; - - assert(pipe); - - /* create buffer */ - buf.stride = sizeof(struct vertex2s); - buf.buffer_offset = 0; - buf.buffer = pipe_buffer_create - ( - pipe->screen, - PIPE_BIND_VERTEX_BUFFER, - PIPE_USAGE_STATIC, - sizeof(struct vertex2s) * num_blocks - ); - - if(!buf.buffer) - return buf; - - /* and fill it */ - v = pipe_buffer_map - ( - pipe, - buf.buffer, - PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, - &buf_transfer - ); - - for ( i = 0; i < num_blocks; ++i, ++v) { - v->x = i; - v->y = i; - } - - pipe_buffer_unmap(pipe, buf_transfer); - - return buf; -} - static struct pipe_vertex_element vl_vb_get_quad_vertex_element(void) { @@ -211,12 +168,10 @@ vl_vb_get_ves_ycbcr(struct pipe_context *pipe) /* Position element */ vertex_elems[VS_I_VPOS].src_format = PIPE_FORMAT_R8G8B8A8_USCALED; - vl_vb_element_helper(&vertex_elems[VS_I_VPOS], 1, 1); - /* block num element */ - vertex_elems[VS_I_BLOCK_NUM].src_format = PIPE_FORMAT_R16G16_SSCALED; + vertex_elems[VS_I_BLOCK_NUM].src_format = PIPE_FORMAT_R32_FLOAT; - vl_vb_element_helper(&vertex_elems[VS_I_BLOCK_NUM], 1, 2); + vl_vb_element_helper(&vertex_elems[VS_I_VPOS], 2, 1); return pipe->create_vertex_elements_state(pipe, 3, vertex_elems); } diff --git a/src/gallium/auxiliary/vl/vl_vertex_buffers.h b/src/gallium/auxiliary/vl/vl_vertex_buffers.h index 38db899..874ecce 100644 --- a/src/gallium/auxiliary/vl/vl_vertex_buffers.h +++ b/src/gallium/auxiliary/vl/vl_vertex_buffers.h @@ -85,6 +85,7 @@ struct vl_ycbcr_block uint8_t x, y; uint8_t intra; uint8_t coding; + float block_num; }; struct vl_vertex_buffer @@ -108,8 +109,6 @@ struct pipe_vertex_buffer vl_vb_upload_quads(struct pipe_context *pipe); struct pipe_vertex_buffer vl_vb_upload_pos(struct pipe_context *pipe, unsigned width, unsigned height); -struct pipe_vertex_buffer vl_vb_upload_block_num(struct pipe_context *pipe, unsigned num_blocks); - void *vl_vb_get_ves_ycbcr(struct pipe_context *pipe); void *vl_vb_get_ves_mv(struct pipe_context *pipe); -- 2.7.4