From ae56a1dd67040dc5d53f4a1622f775462f0fec05 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Christian=20K=C3=B6nig?= Date: Thu, 26 May 2011 01:50:44 +0200 Subject: [PATCH] [g3dvl] implement mismatch control inside idct shaders --- src/gallium/auxiliary/vl/vl_idct.c | 210 ++++++++++++++++++++++--- src/gallium/auxiliary/vl/vl_idct.h | 11 +- src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c | 15 -- src/gallium/auxiliary/vl/vl_mpeg12_decoder.c | 8 +- 4 files changed, 198 insertions(+), 46 deletions(-) diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c index 602258e..4518049 100644 --- a/src/gallium/auxiliary/vl/vl_idct.c +++ b/src/gallium/auxiliary/vl/vl_idct.c @@ -139,6 +139,121 @@ matrix_mul(struct ureg_program *shader, struct ureg_dst dst, struct ureg_dst l[2 } static void * +create_mismatch_vert_shader(struct vl_idct *idct) +{ + struct ureg_program *shader; + struct ureg_src vrect, vpos; + struct ureg_src scale; + struct ureg_dst t_tex; + struct ureg_dst o_vpos, o_addr[2]; + + shader = ureg_create(TGSI_PROCESSOR_VERTEX); + if (!shader) + return NULL; + + vrect = ureg_DECL_vs_input(shader, VS_I_RECT); + vpos = ureg_DECL_vs_input(shader, VS_I_VPOS); + + t_tex = ureg_DECL_temporary(shader); + + o_vpos = ureg_DECL_output(shader, TGSI_SEMANTIC_POSITION, VS_O_VPOS); + + o_addr[0] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0); + o_addr[1] = ureg_DECL_output(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1); + + /* + * scale = (BLOCK_WIDTH, BLOCK_HEIGHT) / (dst.width, dst.height) + * + * t_vpos = vpos + 7 / BLOCK_WIDTH + * o_vpos.xy = t_vpos * scale + * + * o_addr = calc_addr(...) + * + */ + + scale = ureg_imm2f(shader, + (float)BLOCK_WIDTH / idct->buffer_width, + (float)BLOCK_HEIGHT / idct->buffer_height); + + ureg_MAD(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_XY), vpos, scale, scale); + ureg_MOV(shader, ureg_writemask(o_vpos, TGSI_WRITEMASK_ZW), ureg_imm1f(shader, 1.0f)); + + ureg_MUL(shader, ureg_writemask(t_tex, TGSI_WRITEMASK_XY), vpos, scale); + calc_addr(shader, o_addr, ureg_src(t_tex), ureg_src(t_tex), false, false, idct->buffer_width / 4); + + ureg_release_temporary(shader, t_tex); + + ureg_END(shader); + + return ureg_create_shader_and_destroy(shader, idct->pipe); +} + +static void * +create_mismatch_frag_shader(struct vl_idct *idct) +{ + struct ureg_program *shader; + + struct ureg_src addr[2]; + + struct ureg_dst m[8][2]; + struct ureg_dst fragment; + + unsigned i; + + shader = ureg_create(TGSI_PROCESSOR_FRAGMENT); + if (!shader) + return NULL; + + addr[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR0, TGSI_INTERPOLATE_LINEAR); + addr[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_L_ADDR1, TGSI_INTERPOLATE_LINEAR); + + fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); + + for (i = 0; i < 8; ++i) { + m[i][0] = ureg_DECL_temporary(shader); + m[i][1] = ureg_DECL_temporary(shader); + } + + for (i = 0; i < 8; ++i) { + increment_addr(shader, m[i], addr, false, false, i, idct->buffer_height); + } + + for (i = 0; i < 8; ++i) { + struct ureg_src s_addr[2] = { ureg_src(m[i][0]), ureg_src(m[i][1]) }; + fetch_four(shader, m[i], s_addr, ureg_DECL_sampler(shader, 0), false); + } + + for (i = 1; i < 8; ++i) { + ureg_ADD(shader, m[0][0], ureg_src(m[0][0]), ureg_src(m[i][0])); + ureg_ADD(shader, m[0][1], ureg_src(m[0][1]), ureg_src(m[i][1])); + } + + ureg_ADD(shader, m[0][0], ureg_src(m[0][0]), ureg_src(m[0][1])); + ureg_DP4(shader, m[0][0], ureg_abs(ureg_src(m[0][0])), ureg_imm1f(shader, 1 << 14)); + + ureg_MUL(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_abs(ureg_src(m[7][1])), ureg_imm1f(shader, 1 << 14)); + ureg_FRC(shader, m[0][0], ureg_src(m[0][0])); + ureg_SGT(shader, m[0][0], ureg_imm1f(shader, 0.5f), ureg_abs(ureg_src(m[0][0]))); + + ureg_CMP(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_negate(ureg_src(m[0][0])), + ureg_imm1f(shader, 1.0f / (1 << 15)), ureg_imm1f(shader, -1.0f / (1 << 15))); + ureg_MUL(shader, ureg_writemask(m[0][0], TGSI_WRITEMASK_W), ureg_src(m[0][0]), + ureg_scalar(ureg_src(m[0][0]), TGSI_SWIZZLE_X)); + + ureg_MOV(shader, ureg_writemask(fragment, TGSI_WRITEMASK_XYZ), ureg_src(m[7][1])); + ureg_ADD(shader, ureg_writemask(fragment, TGSI_WRITEMASK_W), ureg_src(m[0][0]), ureg_src(m[7][1])); + + for (i = 0; i < 8; ++i) { + ureg_release_temporary(shader, m[i][0]); + ureg_release_temporary(shader, m[i][1]); + } + + ureg_END(shader); + + return ureg_create_shader_and_destroy(shader, idct->pipe); +} + +static void * create_stage1_vert_shader(struct vl_idct *idct) { struct ureg_program *shader; @@ -239,14 +354,14 @@ create_stage1_frag_shader(struct vl_idct *idct) for (i = 0; i < 4; ++i) { struct ureg_src s_addr[2] = { ureg_src(l[i][0]), ureg_src(l[i][1]) }; - fetch_four(shader, l[i], s_addr, ureg_DECL_sampler(shader, 1), false); + fetch_four(shader, l[i], s_addr, ureg_DECL_sampler(shader, 0), false); } for (i = 0; i < idct->nr_of_render_targets; ++i) { increment_addr(shader, r, r_addr, true, true, i - (signed)idct->nr_of_render_targets / 2, BLOCK_HEIGHT); struct ureg_src s_addr[2] = { ureg_src(r[0]), ureg_src(r[1]) }; - fetch_four(shader, r, s_addr, ureg_DECL_sampler(shader, 0), false); + fetch_four(shader, r, s_addr, ureg_DECL_sampler(shader, 1), false); for (j = 0; j < 4; ++j) { matrix_mul(shader, ureg_writemask(fragment[i], TGSI_WRITEMASK_X << j), l[j], r); @@ -324,8 +439,8 @@ vl_idct_stage2_frag_shader(struct vl_idct *idct, struct ureg_program *shader, r[0] = ureg_DECL_temporary(shader); r[1] = ureg_DECL_temporary(shader); - fetch_four(shader, l, l_addr, ureg_DECL_sampler(shader, 0), false); - fetch_four(shader, r, r_addr, ureg_DECL_sampler(shader, 1), true); + fetch_four(shader, l, l_addr, ureg_DECL_sampler(shader, 1), false); + fetch_four(shader, r, r_addr, ureg_DECL_sampler(shader, 0), true); matrix_mul(shader, fragment, l, r); @@ -338,6 +453,14 @@ vl_idct_stage2_frag_shader(struct vl_idct *idct, struct ureg_program *shader, static bool init_shaders(struct vl_idct *idct) { + idct->vs_mismatch = create_mismatch_vert_shader(idct); + if (!idct->vs_mismatch) + goto error_vs_mismatch; + + idct->fs_mismatch = create_mismatch_frag_shader(idct); + if (!idct->fs_mismatch) + goto error_fs_mismatch; + idct->vs = create_stage1_vert_shader(idct); if (!idct->vs) goto error_vs; @@ -352,12 +475,20 @@ error_fs: idct->pipe->delete_vs_state(idct->pipe, idct->vs); error_vs: + idct->pipe->delete_vs_state(idct->pipe, idct->vs_mismatch); + +error_fs_mismatch: + idct->pipe->delete_vs_state(idct->pipe, idct->fs); + +error_vs_mismatch: return false; } static void cleanup_shaders(struct vl_idct *idct) { + idct->pipe->delete_vs_state(idct->pipe, idct->vs_mismatch); + idct->pipe->delete_fs_state(idct->pipe, idct->fs_mismatch); idct->pipe->delete_vs_state(idct->pipe, idct->vs); idct->pipe->delete_fs_state(idct->pipe, idct->fs); } @@ -373,6 +504,7 @@ init_state(struct vl_idct *idct) assert(idct); memset(&rs_state, 0, sizeof(rs_state)); + rs_state.point_size = 1; rs_state.gl_rasterization_rules = true; idct->rs_state = idct->pipe->create_rasterizer_state(idct->pipe, &rs_state); if (!idct->rs_state) @@ -442,6 +574,45 @@ cleanup_state(struct vl_idct *idct) } static bool +init_source(struct vl_idct *idct, struct vl_idct_buffer *buffer) +{ + struct pipe_resource *tex; + struct pipe_surface surf_templ; + + assert(idct && buffer); + + tex = buffer->sampler_views.individual.source->texture; + + buffer->fb_state_mismatch.width = tex->width0; + buffer->fb_state_mismatch.height = tex->height0; + buffer->fb_state_mismatch.nr_cbufs = 1; + + memset(&surf_templ, 0, sizeof(surf_templ)); + surf_templ.format = tex->format; + surf_templ.u.tex.first_layer = 0; + surf_templ.u.tex.last_layer = 0; + surf_templ.usage = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET; + buffer->fb_state_mismatch.cbufs[0] = idct->pipe->create_surface(idct->pipe, tex, &surf_templ); + + buffer->viewport_mismatch.scale[0] = tex->width0; + buffer->viewport_mismatch.scale[1] = tex->height0; + buffer->viewport_mismatch.scale[2] = 1; + buffer->viewport_mismatch.scale[3] = 1; + + return true; +} + +static void +cleanup_source(struct vl_idct *idct, struct vl_idct_buffer *buffer) +{ + assert(idct && buffer); + + pipe_surface_reference(&buffer->fb_state_mismatch.cbufs[0], NULL); + + pipe_sampler_view_reference(&buffer->sampler_views.individual.source, NULL); +} + +static bool init_intermediate(struct vl_idct *idct, struct vl_idct_buffer *buffer) { struct pipe_resource *tex; @@ -470,6 +641,8 @@ init_intermediate(struct vl_idct *idct, struct vl_idct_buffer *buffer) buffer->viewport.scale[0] = tex->width0; buffer->viewport.scale[1] = tex->height0; + buffer->viewport.scale[2] = 1; + buffer->viewport.scale[3] = 1; return true; @@ -609,13 +782,11 @@ vl_idct_cleanup(struct vl_idct *idct) bool vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer, struct pipe_sampler_view *source, - struct pipe_sampler_view *intermediate, - struct pipe_surface *destination) + struct pipe_sampler_view *intermediate) { assert(buffer); assert(idct); assert(source); - assert(destination); memset(buffer, 0, sizeof(struct vl_idct_buffer)); @@ -624,15 +795,11 @@ vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer, pipe_sampler_view_reference(&buffer->sampler_views.individual.transpose, idct->transpose); pipe_sampler_view_reference(&buffer->sampler_views.individual.intermediate, intermediate); - if (!init_intermediate(idct, buffer)) + if (!init_source(idct, buffer)) return false; - buffer->viewport.scale[2] = 1; - buffer->viewport.scale[3] = 1; - buffer->viewport.translate[0] = 0; - buffer->viewport.translate[1] = 0; - buffer->viewport.translate[2] = 0; - buffer->viewport.translate[3] = 0; + if (!init_intermediate(idct, buffer)) + return false; return true; } @@ -640,13 +807,9 @@ vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer, void vl_idct_cleanup_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer) { - unsigned i; - assert(idct && buffer); - for(i = 0; i < idct->nr_of_render_targets; ++i) - pipe_surface_reference(&buffer->fb_state.cbufs[i], NULL); - + cleanup_source(idct, buffer); cleanup_intermediate(idct, buffer); } @@ -659,11 +822,18 @@ vl_idct_flush(struct vl_idct *idct, struct vl_idct_buffer *buffer, unsigned num_ idct->pipe->bind_rasterizer_state(idct->pipe, idct->rs_state); idct->pipe->bind_blend_state(idct->pipe, idct->blend); idct->pipe->bind_fragment_sampler_states(idct->pipe, 2, idct->samplers); + idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[0]); + + /* mismatch control */ + idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state_mismatch); + idct->pipe->set_viewport_state(idct->pipe, &buffer->viewport_mismatch); + idct->pipe->bind_vs_state(idct->pipe, idct->vs_mismatch); + idct->pipe->bind_fs_state(idct->pipe, idct->fs_mismatch); + util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_POINTS, 0, 1, 0, num_instances); /* first stage */ idct->pipe->set_framebuffer_state(idct->pipe, &buffer->fb_state); idct->pipe->set_viewport_state(idct->pipe, &buffer->viewport); - idct->pipe->set_fragment_sampler_views(idct->pipe, 2, buffer->sampler_views.stage[0]); idct->pipe->bind_vs_state(idct->pipe, idct->vs); idct->pipe->bind_fs_state(idct->pipe, idct->fs); util_draw_arrays_instanced(idct->pipe, PIPE_PRIM_QUADS, 0, 4, 0, num_instances); diff --git a/src/gallium/auxiliary/vl/vl_idct.h b/src/gallium/auxiliary/vl/vl_idct.h index f5a1e5d..119a53d 100644 --- a/src/gallium/auxiliary/vl/vl_idct.h +++ b/src/gallium/auxiliary/vl/vl_idct.h @@ -48,6 +48,7 @@ struct vl_idct void *samplers[2]; + void *vs_mismatch, *fs_mismatch; void *vs, *fs; struct pipe_sampler_view *matrix; @@ -57,7 +58,10 @@ struct vl_idct /* a set of buffers to work with */ struct vl_idct_buffer { + struct pipe_viewport_state viewport_mismatch; struct pipe_viewport_state viewport; + + struct pipe_framebuffer_state fb_state_mismatch; struct pipe_framebuffer_state fb_state; union @@ -65,8 +69,8 @@ struct vl_idct_buffer struct pipe_sampler_view *all[4]; struct pipe_sampler_view *stage[2][2]; struct { - struct pipe_sampler_view *matrix, *source; - struct pipe_sampler_view *transpose, *intermediate; + struct pipe_sampler_view *source, *matrix; + struct pipe_sampler_view *intermediate, *transpose; } individual; } sampler_views; }; @@ -99,8 +103,7 @@ vl_idct_cleanup(struct vl_idct *idct); bool vl_idct_init_buffer(struct vl_idct *idct, struct vl_idct_buffer *buffer, struct pipe_sampler_view *source, - struct pipe_sampler_view *intermediate, - struct pipe_surface *destination); + struct pipe_sampler_view *intermediate); /* cleanup a buffer of an idct instance */ void diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c index 508bb9f..bf9b6cd 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c +++ b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c @@ -724,11 +724,9 @@ static inline void get_intra_block_B14(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quantizer_scale, short *dest) { int i, val; - int mismatch; const DCTtab *tab; i = 0; - mismatch = ~dest[0]; vl_vlc_needbits(&bs->vlc); @@ -751,7 +749,6 @@ get_intra_block_B14(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan SATURATE (val); dest[i] = val; - mismatch ^= val; bs->vlc.buf <<= 1; vl_vlc_needbits(&bs->vlc); @@ -778,7 +775,6 @@ get_intra_block_B14(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan SATURATE (val); dest[i] = val; - mismatch ^= val; vl_vlc_dumpbits(&bs->vlc, 12); vl_vlc_needbits(&bs->vlc); @@ -811,7 +807,6 @@ get_intra_block_B14(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan break; /* illegal, check needed to avoid buffer overflow */ } - dest[63] ^= mismatch & 1; vl_vlc_dumpbits(&bs->vlc, 2); /* dump end of block code */ } @@ -819,11 +814,9 @@ static inline void get_intra_block_B15(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quantizer_scale, short *dest) { int i, val; - int mismatch; const DCTtab * tab; i = 0; - mismatch = ~dest[0]; vl_vlc_needbits(&bs->vlc); @@ -845,7 +838,6 @@ get_intra_block_B15(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan SATURATE (val); dest[i] = val; - mismatch ^= val; bs->vlc.buf <<= 1; vl_vlc_needbits(&bs->vlc); @@ -871,7 +863,6 @@ get_intra_block_B15(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan SATURATE (val); dest[i] = val; - mismatch ^= val; vl_vlc_dumpbits(&bs->vlc, 12); vl_vlc_needbits(&bs->vlc); @@ -905,7 +896,6 @@ get_intra_block_B15(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan break; /* illegal, check needed to avoid buffer overflow */ } - dest[63] ^= mismatch & 1; vl_vlc_dumpbits(&bs->vlc, 4); /* dump end of block code */ } @@ -913,11 +903,9 @@ static inline void get_non_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quantizer_scale, short *dest) { int i, val; - int mismatch; const DCTtab *tab; i = -1; - mismatch = 1; vl_vlc_needbits(&bs->vlc); if (bs->vlc.buf >= 0x28000000) { @@ -946,7 +934,6 @@ get_non_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan SATURATE (val); dest[i] = val; - mismatch ^= val; bs->vlc.buf <<= 1; vl_vlc_needbits(&bs->vlc); @@ -977,7 +964,6 @@ get_non_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan SATURATE (val); dest[i] = val; - mismatch ^= val; vl_vlc_dumpbits(&bs->vlc, 12); vl_vlc_needbits(&bs->vlc); @@ -1009,7 +995,6 @@ get_non_intra_block(struct vl_mpg12_bs *bs, const int quant_matrix[64], int quan } break; /* illegal, check needed to avoid buffer overflow */ } - dest[63] ^= mismatch & 1; vl_vlc_dumpbits(&bs->vlc, 2); /* dump end of block code */ } diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c index 4337e08..f96d7f0 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c +++ b/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c @@ -147,7 +147,6 @@ static bool init_idct_buffer(struct vl_mpeg12_buffer *buffer) { struct pipe_sampler_view **idct_source_sv, **mc_source_sv; - struct pipe_surface **idct_surfaces; struct vl_mpeg12_decoder *dec; @@ -165,14 +164,10 @@ init_idct_buffer(struct vl_mpeg12_buffer *buffer) if (!mc_source_sv) goto error_mc_source_sv; - idct_surfaces = dec->mc_source->get_surfaces(dec->mc_source); - if (!idct_surfaces) - goto error_surfaces; - for (i = 0; i < 3; ++i) if (!vl_idct_init_buffer(i == 0 ? &dec->idct_y : &dec->idct_c, &buffer->idct[i], idct_source_sv[i], - mc_source_sv[i], idct_surfaces[i])) + mc_source_sv[i])) goto error_plane; return true; @@ -181,7 +176,6 @@ error_plane: for (; i > 0; --i) vl_idct_cleanup_buffer(i == 1 ? &dec->idct_c : &dec->idct_y, &buffer->idct[i - 1]); -error_surfaces: error_mc_source_sv: error_source_sv: return false; -- 2.7.4