From 03c5a0ea5cd5b3e5931d6784749f87789a016b98 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Christian=20K=C3=B6nig?= Date: Sat, 20 Nov 2010 21:08:12 +0100 Subject: [PATCH] [g3dvl] enable stage 1&2 buffers in idct code --- src/gallium/auxiliary/vl/vl_idct.c | 128 ++++++++++++----------- src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c | 35 +++---- 2 files changed, 83 insertions(+), 80 deletions(-) diff --git a/src/gallium/auxiliary/vl/vl_idct.c b/src/gallium/auxiliary/vl/vl_idct.c index 55e0751..9f81e0b 100644 --- a/src/gallium/auxiliary/vl/vl_idct.c +++ b/src/gallium/auxiliary/vl/vl_idct.c @@ -39,7 +39,7 @@ #define BLOCK_WIDTH 8 #define BLOCK_HEIGHT 8 #define SCALE_FACTOR_16_TO_12 (32768.0f / 2048.0f) -#define SCALE_FACTOR_9_TO_16 (256.0f / 32768.0f) +#define SCALE_FACTOR_16_TO_9 (32768.0f / 256.0f) struct vertex_shader_consts { @@ -217,13 +217,15 @@ create_transpose_frag_shader(struct vl_idct *idct) struct ureg_program *shader; struct ureg_src tc[2], sampler[2]; struct ureg_src start[2], step[2]; - struct ureg_dst fragment; + struct ureg_dst tmp, fragment; float scale[2]; shader = ureg_create(TGSI_PROCESSOR_FRAGMENT); if (!shader) return NULL; + tmp = ureg_DECL_temporary(shader); + tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK, TGSI_INTERPOLATE_LINEAR); tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX, TGSI_INTERPOLATE_LINEAR); @@ -244,6 +246,9 @@ create_transpose_frag_shader(struct vl_idct *idct) //matrix_mul(shader, fragment, tc, sampler, start, step, scale); //ureg_MOV(shader, fragment, ureg_imm1f(shader, 0.0f)); + ureg_TEX(shader, tmp, TGSI_TEXTURE_2D, tc[1], sampler[1]); + ureg_MUL(shader, fragment, ureg_src(tmp), ureg_imm1f(shader, SCALE_FACTOR_16_TO_9)); + ureg_END(shader); return ureg_create_shader_and_destroy(shader, idct->pipe); @@ -262,8 +267,6 @@ create_matrix_frag_shader(struct vl_idct *idct) if (!shader) return NULL; - tmp = ureg_DECL_temporary(shader); - tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX, TGSI_INTERPOLATE_LINEAR); tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK, TGSI_INTERPOLATE_LINEAR); @@ -281,10 +284,9 @@ create_matrix_frag_shader(struct vl_idct *idct) fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0); -// matrix_mul(shader, tmp, tc, sampler, start, step, scale); -// ureg_MUL(shader, fragment, ureg_src(tmp), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_9_TO_16), TGSI_SWIZZLE_X)); + //matrix_mul(shader, tmp, tc, sampler, start, step, scale); + ureg_TEX(shader, fragment, TGSI_TEXTURE_2D, tc[0], sampler[0]); - //ureg_MUL(shader, , ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(shader, 1.0f)); ureg_END(shader); @@ -304,11 +306,7 @@ xfer_buffers_map(struct vl_idct *idct) idct->tex_transfer = idct->pipe->get_transfer ( -#if 0 - idct->pipe, idct->textures.individual.intermediate, -#else - idct->pipe, idct->destination, -#endif + idct->pipe, idct->textures.individual.source, u_subresource(0, 0), PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD, &rect @@ -391,8 +389,7 @@ init_buffers(struct vl_idct *idct) template.usage = PIPE_USAGE_DYNAMIC; idct->textures.individual.source = idct->pipe->screen->resource_create(idct->pipe->screen, &template); - //template.format = PIPE_FORMAT_R32_FLOAT; - //template.usage = PIPE_USAGE_STATIC; + template.usage = PIPE_USAGE_STATIC; idct->textures.individual.intermediate = idct->pipe->screen->resource_create(idct->pipe->screen, &template); for (i = 0; i < 4; ++i) { @@ -575,7 +572,8 @@ cleanup_state(struct vl_idct *idct) idct->pipe->delete_sampler_state(idct->pipe, idct->samplers.all[i]); } -bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, struct pipe_resource *dst) +bool +vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, struct pipe_resource *dst) { assert(idct && pipe && dst); @@ -606,7 +604,8 @@ bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, struct pipe_r return true; } -void vl_idct_cleanup(struct vl_idct *idct) +void +vl_idct_cleanup(struct vl_idct *idct) { idct->pipe->screen->tex_surface_destroy(idct->surfaces.destination); idct->pipe->screen->tex_surface_destroy(idct->surfaces.intermediate); @@ -619,7 +618,8 @@ void vl_idct_cleanup(struct vl_idct *idct) pipe_resource_reference(&idct->destination, NULL); } -void vl_idct_add_block(struct vl_idct *idct, unsigned x, unsigned y, short *block) +void +vl_idct_add_block(struct vl_idct *idct, unsigned x, unsigned y, short *block) { struct vertex2f v; @@ -629,61 +629,69 @@ void vl_idct_add_block(struct vl_idct *idct, unsigned x, unsigned y, short *bloc unsigned i; assert(idct); - assert(block); - - v.x = x; - v.y = y; - - for (i = 0; i < 4; ++i) { - idct->vectors[idct->num_blocks * 4 + i] = v; - } tex_pitch = idct->tex_transfer->stride / util_format_get_blocksize(idct->tex_transfer->resource->format); texels = idct->texels + y * tex_pitch * BLOCK_HEIGHT + x * BLOCK_WIDTH; - for (i = 0; i < BLOCK_HEIGHT; ++i) - memcpy(texels + i * tex_pitch, block + i * BLOCK_WIDTH, BLOCK_WIDTH * 2); + if(block) { + v.x = x; + v.y = y; + + for (i = 0; i < 4; ++i) { + idct->vectors[idct->num_blocks * 4 + i] = v; + } + + for (i = 0; i < BLOCK_HEIGHT; ++i) + memcpy(texels + i * tex_pitch, block + i * BLOCK_WIDTH, BLOCK_WIDTH * 2); - idct->num_blocks++; + idct->num_blocks++; + } else { + for (i = 0; i < BLOCK_HEIGHT; ++i) + memset(texels + i * tex_pitch, 0, BLOCK_WIDTH * 2); + } } -void vl_idct_flush(struct vl_idct *idct) +void +vl_idct_flush(struct vl_idct *idct) { xfer_buffers_unmap(idct); idct->pipe->set_constant_buffer(idct->pipe, PIPE_SHADER_VERTEX, 0, idct->vs_const_buf); -#if 0 - /* first stage */ - idct->fb_state.cbufs[0] = idct->surfaces.intermediate; - idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state); - idct->pipe->set_viewport_state(idct->pipe, &idct->viewport); - - idct->pipe->set_vertex_buffers(idct->pipe, 2, idct->vertex_bufs.all); - idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state); - idct->pipe->set_fragment_sampler_views(idct->pipe, 4, idct->sampler_views.all); - idct->pipe->bind_fragment_sampler_states(idct->pipe, 4, idct->samplers.all); - idct->pipe->bind_vs_state(idct->pipe, idct->vs); - idct->pipe->bind_fs_state(idct->pipe, idct->transpose_fs); - - util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, idct->num_blocks * 4); - - /* second stage */ - idct->fb_state.cbufs[0] = idct->surfaces.destination; - idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state); - idct->pipe->set_viewport_state(idct->pipe, &idct->viewport); - - idct->pipe->set_vertex_buffers(idct->pipe, 2, idct->vertex_bufs.all); - idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state); - idct->pipe->set_fragment_sampler_views(idct->pipe, 4, idct->sampler_views.all); - idct->pipe->bind_fragment_sampler_states(idct->pipe, 4, idct->samplers.all); - idct->pipe->bind_vs_state(idct->pipe, idct->vs); - idct->pipe->bind_fs_state(idct->pipe, idct->matrix_fs); - - util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, idct->num_blocks * 4); - - idct->pipe->flush(idct->pipe, PIPE_FLUSH_RENDER_CACHE, NULL); -#endif + if(idct->num_blocks > 0) { + + /* first stage */ + idct->fb_state.cbufs[0] = idct->surfaces.intermediate; + idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state); + idct->pipe->set_viewport_state(idct->pipe, &idct->viewport); + + idct->pipe->set_vertex_buffers(idct->pipe, 2, idct->vertex_bufs.all); + idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state); + idct->pipe->set_fragment_sampler_views(idct->pipe, 4, idct->sampler_views.all); + idct->pipe->bind_fragment_sampler_states(idct->pipe, 4, idct->samplers.all); + idct->pipe->bind_vs_state(idct->pipe, idct->vs); + idct->pipe->bind_fs_state(idct->pipe, idct->transpose_fs); + + util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, idct->num_blocks * 4); + + idct->pipe->flush(idct->pipe, PIPE_FLUSH_RENDER_CACHE, NULL); + + /* second stage */ + idct->fb_state.cbufs[0] = idct->surfaces.destination; + idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state); + idct->pipe->set_viewport_state(idct->pipe, &idct->viewport); + + idct->pipe->set_vertex_buffers(idct->pipe, 2, idct->vertex_bufs.all); + idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state); + idct->pipe->set_fragment_sampler_views(idct->pipe, 4, idct->sampler_views.all); + idct->pipe->bind_fragment_sampler_states(idct->pipe, 4, idct->samplers.all); + idct->pipe->bind_vs_state(idct->pipe, idct->vs); + idct->pipe->bind_fs_state(idct->pipe, idct->matrix_fs); + + util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, idct->num_blocks * 4); + + idct->pipe->flush(idct->pipe, PIPE_FLUSH_RENDER_CACHE, NULL); + } idct->num_blocks = 0; xfer_buffers_map(idct); diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c index d08e09c..fd50193 100644 --- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c +++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c @@ -42,7 +42,6 @@ #define MACROBLOCK_HEIGHT 16 #define BLOCK_WIDTH 8 #define BLOCK_HEIGHT 8 -#define SCALE_FACTOR_16_TO_9 (32768.0f / 256.0f) struct vertex_shader_consts { @@ -372,9 +371,7 @@ create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r) */ field = calc_field(shader); texel = fetch_ycbcr(r, shader, field); - ureg_MAD(shader, fragment, ureg_src(texel), - ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), - ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X)); + ureg_ADD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X)); ureg_release_temporary(shader, field); ureg_release_temporary(shader, texel); @@ -410,7 +407,7 @@ create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r) field = calc_field(shader); texel = fetch_ycbcr(r, shader, field); ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc, sampler); - ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref)); + ureg_ADD(shader, fragment, ureg_src(texel), ureg_src(ref)); ureg_release_temporary(shader, field); ureg_release_temporary(shader, texel); @@ -459,7 +456,7 @@ create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r) ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[0], sampler); ureg_ENDIF(shader); - ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref)); + ureg_ADD(shader, fragment, ureg_src(texel), ureg_src(ref)); ureg_release_temporary(shader, field); ureg_release_temporary(shader, texel); @@ -504,7 +501,7 @@ create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r) ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[1], sampler[1]); ureg_LRP(shader, ref[0], ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X), ureg_src(ref[0]), ureg_src(ref[1])); - ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref[0])); + ureg_ADD(shader, fragment, ureg_src(texel), ureg_src(ref[0])); ureg_release_temporary(shader, field); ureg_release_temporary(shader, texel); @@ -561,7 +558,7 @@ create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r) ureg_LRP(shader, ref[0], ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X), ureg_src(ref[0]), ureg_src(ref[1])); - ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref[0])); + ureg_ADD(shader, fragment, ureg_src(texel), ureg_src(ref[0])); ureg_release_temporary(shader, field); ureg_release_temporary(shader, texel); @@ -1315,7 +1312,7 @@ static void grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby, enum pipe_mpeg12_dct_type dct_type, unsigned cbp, short *blocks) { - unsigned tb = 0, sb = 0; + unsigned tb = 0; unsigned x, y; assert(r); @@ -1323,10 +1320,9 @@ grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby, for (y = 0; y < 2; ++y) { for (x = 0; x < 2; ++x, ++tb) { - if ((cbp >> (5 - tb)) & 1) { - vl_idct_add_block(&r->idct_y, mbx * 2 + x, mby * 2 + y, blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT); - ++sb; - } + bool eb = !(cbp & (1 << (5 - tb))); + vl_idct_add_block(&r->idct_y, mbx * 2 + x, mby * 2 + y, eb ? NULL : blocks); + blocks += eb ? 0 : BLOCK_WIDTH * BLOCK_HEIGHT; } } @@ -1334,13 +1330,12 @@ grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby, assert(r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420); for (tb = 0; tb < 2; ++tb) { - if ((cbp >> (1 - tb)) & 1) { - if(tb == 0) - vl_idct_add_block(&r->idct_cb, mbx, mby, blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT); - else - vl_idct_add_block(&r->idct_cr, mbx, mby, blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT); - ++sb; - } + bool eb = !(cbp & (1 << (1 - tb))); + if(tb == 0) + vl_idct_add_block(&r->idct_cb, mbx, mby, eb ? NULL : blocks); + else + vl_idct_add_block(&r->idct_cr, mbx, mby, eb ? NULL : blocks); + blocks += eb ? 0 : BLOCK_WIDTH * BLOCK_HEIGHT; } } -- 2.7.4