[g3dvl] enable stage 1&2 buffers in idct code
authorChristian König <deathsimple@vodafone.de>
Sat, 20 Nov 2010 20:08:12 +0000 (21:08 +0100)
committerChristian König <deathsimple@vodafone.de>
Sat, 20 Nov 2010 20:08:12 +0000 (21:08 +0100)
src/gallium/auxiliary/vl/vl_idct.c
src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c

index 55e0751..9f81e0b 100644 (file)
@@ -39,7 +39,7 @@
 #define BLOCK_WIDTH 8
 #define BLOCK_HEIGHT 8
 #define SCALE_FACTOR_16_TO_12 (32768.0f / 2048.0f)
-#define SCALE_FACTOR_9_TO_16 (256.0f / 32768.0f)
+#define SCALE_FACTOR_16_TO_9 (32768.0f / 256.0f)
 
 struct vertex_shader_consts
 {
@@ -217,13 +217,15 @@ create_transpose_frag_shader(struct vl_idct *idct)
    struct ureg_program *shader;
    struct ureg_src tc[2], sampler[2];
    struct ureg_src start[2], step[2];
-   struct ureg_dst fragment;
+   struct ureg_dst tmp, fragment;
    float scale[2];
 
    shader = ureg_create(TGSI_PROCESSOR_FRAGMENT);
    if (!shader)
       return NULL;
 
+   tmp = ureg_DECL_temporary(shader);
+
    tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK, TGSI_INTERPOLATE_LINEAR);
    tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX, TGSI_INTERPOLATE_LINEAR);
 
@@ -244,6 +246,9 @@ create_transpose_frag_shader(struct vl_idct *idct)
    //matrix_mul(shader, fragment, tc, sampler, start, step, scale);
    //ureg_MOV(shader, fragment, ureg_imm1f(shader, 0.0f));
 
+   ureg_TEX(shader, tmp, TGSI_TEXTURE_2D, tc[1], sampler[1]);
+   ureg_MUL(shader, fragment, ureg_src(tmp), ureg_imm1f(shader, SCALE_FACTOR_16_TO_9));
+
    ureg_END(shader);
 
    return ureg_create_shader_and_destroy(shader, idct->pipe);
@@ -262,8 +267,6 @@ create_matrix_frag_shader(struct vl_idct *idct)
    if (!shader)
       return NULL;
 
-   tmp = ureg_DECL_temporary(shader);
-
    tc[0] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_TEX, TGSI_INTERPOLATE_LINEAR);
    tc[1] = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_BLOCK, TGSI_INTERPOLATE_LINEAR);
 
@@ -281,10 +284,9 @@ create_matrix_frag_shader(struct vl_idct *idct)
 
    fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
 
-//   matrix_mul(shader, tmp, tc, sampler, start, step, scale);
-//   ureg_MUL(shader, fragment, ureg_src(tmp), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_9_TO_16), TGSI_SWIZZLE_X));
+   //matrix_mul(shader, tmp, tc, sampler, start, step, scale);
+
    ureg_TEX(shader, fragment, TGSI_TEXTURE_2D, tc[0], sampler[0]);
-   //ureg_MUL(shader, , ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(shader, 1.0f));
 
    ureg_END(shader);
 
@@ -304,11 +306,7 @@ xfer_buffers_map(struct vl_idct *idct)
 
    idct->tex_transfer = idct->pipe->get_transfer
    (
-#if 0
-      idct->pipe, idct->textures.individual.intermediate,
-#else
-      idct->pipe, idct->destination,
-#endif
+      idct->pipe, idct->textures.individual.source,
       u_subresource(0, 0),
       PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD,
       &rect
@@ -391,8 +389,7 @@ init_buffers(struct vl_idct *idct)
    template.usage = PIPE_USAGE_DYNAMIC;
    idct->textures.individual.source = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
 
-   //template.format = PIPE_FORMAT_R32_FLOAT;
-   //template.usage = PIPE_USAGE_STATIC;
+   template.usage = PIPE_USAGE_STATIC;
    idct->textures.individual.intermediate = idct->pipe->screen->resource_create(idct->pipe->screen, &template);
 
    for (i = 0; i < 4; ++i) {
@@ -575,7 +572,8 @@ cleanup_state(struct vl_idct *idct)
       idct->pipe->delete_sampler_state(idct->pipe, idct->samplers.all[i]);
 }
 
-bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, struct pipe_resource *dst)
+bool
+vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, struct pipe_resource *dst)
 {
    assert(idct && pipe && dst);
 
@@ -606,7 +604,8 @@ bool vl_idct_init(struct vl_idct *idct, struct pipe_context *pipe, struct pipe_r
    return true;
 }
 
-void vl_idct_cleanup(struct vl_idct *idct)
+void
+vl_idct_cleanup(struct vl_idct *idct)
 {
    idct->pipe->screen->tex_surface_destroy(idct->surfaces.destination);
    idct->pipe->screen->tex_surface_destroy(idct->surfaces.intermediate);
@@ -619,7 +618,8 @@ void vl_idct_cleanup(struct vl_idct *idct)
    pipe_resource_reference(&idct->destination, NULL);
 }
 
-void vl_idct_add_block(struct vl_idct *idct, unsigned x, unsigned y, short *block)
+void
+vl_idct_add_block(struct vl_idct *idct, unsigned x, unsigned y, short *block)
 {
    struct vertex2f v;
 
@@ -629,61 +629,69 @@ void vl_idct_add_block(struct vl_idct *idct, unsigned x, unsigned y, short *bloc
    unsigned i;
 
    assert(idct);
-   assert(block);
-
-   v.x = x;
-   v.y = y;
-
-   for (i = 0; i < 4; ++i) {
-      idct->vectors[idct->num_blocks * 4 + i] = v;
-   }
 
    tex_pitch = idct->tex_transfer->stride / util_format_get_blocksize(idct->tex_transfer->resource->format);
    texels = idct->texels + y * tex_pitch * BLOCK_HEIGHT + x * BLOCK_WIDTH;
 
-   for (i = 0; i < BLOCK_HEIGHT; ++i)
-      memcpy(texels + i * tex_pitch, block + i * BLOCK_WIDTH, BLOCK_WIDTH * 2);
+   if(block) {
+      v.x = x;
+      v.y = y;
+
+      for (i = 0; i < 4; ++i) {
+         idct->vectors[idct->num_blocks * 4 + i] = v;
+      }
+
+      for (i = 0; i < BLOCK_HEIGHT; ++i)
+         memcpy(texels + i * tex_pitch, block + i * BLOCK_WIDTH, BLOCK_WIDTH * 2);
 
-   idct->num_blocks++;
+      idct->num_blocks++;
+   } else {
+      for (i = 0; i < BLOCK_HEIGHT; ++i)
+         memset(texels + i * tex_pitch, 0, BLOCK_WIDTH * 2);      
+   }
 }
 
-void vl_idct_flush(struct vl_idct *idct)
+void
+vl_idct_flush(struct vl_idct *idct)
 {
    xfer_buffers_unmap(idct);
 
    idct->pipe->set_constant_buffer(idct->pipe, PIPE_SHADER_VERTEX, 0, idct->vs_const_buf);
 
-#if 0
-   /* first stage */
-   idct->fb_state.cbufs[0] = idct->surfaces.intermediate;
-   idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state);
-   idct->pipe->set_viewport_state(idct->pipe, &idct->viewport);
-
-   idct->pipe->set_vertex_buffers(idct->pipe, 2, idct->vertex_bufs.all);
-   idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state);
-   idct->pipe->set_fragment_sampler_views(idct->pipe, 4, idct->sampler_views.all);
-   idct->pipe->bind_fragment_sampler_states(idct->pipe, 4, idct->samplers.all);
-   idct->pipe->bind_vs_state(idct->pipe, idct->vs);
-   idct->pipe->bind_fs_state(idct->pipe, idct->transpose_fs);
-
-   util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, idct->num_blocks * 4);
-
-   /* second stage */
-   idct->fb_state.cbufs[0] = idct->surfaces.destination;
-   idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state);
-   idct->pipe->set_viewport_state(idct->pipe, &idct->viewport);
-
-   idct->pipe->set_vertex_buffers(idct->pipe, 2, idct->vertex_bufs.all);
-   idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state);
-   idct->pipe->set_fragment_sampler_views(idct->pipe, 4, idct->sampler_views.all);
-   idct->pipe->bind_fragment_sampler_states(idct->pipe, 4, idct->samplers.all);
-   idct->pipe->bind_vs_state(idct->pipe, idct->vs);
-   idct->pipe->bind_fs_state(idct->pipe, idct->matrix_fs);
-
-   util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, idct->num_blocks * 4);
-
-   idct->pipe->flush(idct->pipe, PIPE_FLUSH_RENDER_CACHE, NULL);
-#endif
+   if(idct->num_blocks > 0) {
+
+      /* first stage */
+      idct->fb_state.cbufs[0] = idct->surfaces.intermediate;
+      idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state);
+      idct->pipe->set_viewport_state(idct->pipe, &idct->viewport);
+
+      idct->pipe->set_vertex_buffers(idct->pipe, 2, idct->vertex_bufs.all);
+      idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state);
+      idct->pipe->set_fragment_sampler_views(idct->pipe, 4, idct->sampler_views.all);
+      idct->pipe->bind_fragment_sampler_states(idct->pipe, 4, idct->samplers.all);
+      idct->pipe->bind_vs_state(idct->pipe, idct->vs);
+      idct->pipe->bind_fs_state(idct->pipe, idct->transpose_fs);
+
+      util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, idct->num_blocks * 4);
+
+      idct->pipe->flush(idct->pipe, PIPE_FLUSH_RENDER_CACHE, NULL);
+
+      /* second stage */
+      idct->fb_state.cbufs[0] = idct->surfaces.destination;
+      idct->pipe->set_framebuffer_state(idct->pipe, &idct->fb_state);
+      idct->pipe->set_viewport_state(idct->pipe, &idct->viewport);
+
+      idct->pipe->set_vertex_buffers(idct->pipe, 2, idct->vertex_bufs.all);
+      idct->pipe->bind_vertex_elements_state(idct->pipe, idct->vertex_elems_state);
+      idct->pipe->set_fragment_sampler_views(idct->pipe, 4, idct->sampler_views.all);
+      idct->pipe->bind_fragment_sampler_states(idct->pipe, 4, idct->samplers.all);
+      idct->pipe->bind_vs_state(idct->pipe, idct->vs);
+      idct->pipe->bind_fs_state(idct->pipe, idct->matrix_fs);
+
+      util_draw_arrays(idct->pipe, PIPE_PRIM_QUADS, 0, idct->num_blocks * 4);
+
+      idct->pipe->flush(idct->pipe, PIPE_FLUSH_RENDER_CACHE, NULL);
+   }
 
    idct->num_blocks = 0;
    xfer_buffers_map(idct);
index d08e09c..fd50193 100644 (file)
@@ -42,7 +42,6 @@
 #define MACROBLOCK_HEIGHT 16
 #define BLOCK_WIDTH 8
 #define BLOCK_HEIGHT 8
-#define SCALE_FACTOR_16_TO_9 (32768.0f / 256.0f)
 
 struct vertex_shader_consts
 {
@@ -372,9 +371,7 @@ create_intra_frag_shader(struct vl_mpeg12_mc_renderer *r)
     */
    field = calc_field(shader);
    texel = fetch_ycbcr(r, shader, field);
-   ureg_MAD(shader, fragment, ureg_src(texel), 
-            ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), 
-            ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X));
+   ureg_ADD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X));
 
    ureg_release_temporary(shader, field);
    ureg_release_temporary(shader, texel);
@@ -410,7 +407,7 @@ create_frame_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
    field = calc_field(shader);
    texel = fetch_ycbcr(r, shader, field);
    ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc, sampler);
-   ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref));
+   ureg_ADD(shader, fragment, ureg_src(texel), ureg_src(ref));
 
    ureg_release_temporary(shader, field);
    ureg_release_temporary(shader, texel);
@@ -459,7 +456,7 @@ create_field_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
       ureg_TEX(shader, ref, TGSI_TEXTURE_2D, tc[0], sampler);
    ureg_ENDIF(shader);
 
-   ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref));
+   ureg_ADD(shader, fragment, ureg_src(texel), ureg_src(ref));
 
    ureg_release_temporary(shader, field);
    ureg_release_temporary(shader, texel);
@@ -504,7 +501,7 @@ create_frame_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
    ureg_TEX(shader, ref[1], TGSI_TEXTURE_2D, tc[1], sampler[1]);
    ureg_LRP(shader, ref[0], ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X), ureg_src(ref[0]), ureg_src(ref[1]));
 
-   ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref[0]));
+   ureg_ADD(shader, fragment, ureg_src(texel), ureg_src(ref[0]));
 
    ureg_release_temporary(shader, field);
    ureg_release_temporary(shader, texel);
@@ -561,7 +558,7 @@ create_field_bi_pred_frag_shader(struct vl_mpeg12_mc_renderer *r)
 
    ureg_LRP(shader, ref[0], ureg_scalar(ureg_imm1f(shader, 0.5f), TGSI_SWIZZLE_X), ureg_src(ref[0]), ureg_src(ref[1]));
 
-   ureg_MAD(shader, fragment, ureg_src(texel), ureg_scalar(ureg_imm1f(shader, SCALE_FACTOR_16_TO_9), TGSI_SWIZZLE_X), ureg_src(ref[0]));
+   ureg_ADD(shader, fragment, ureg_src(texel), ureg_src(ref[0]));
 
    ureg_release_temporary(shader, field);
    ureg_release_temporary(shader, texel);
@@ -1315,7 +1312,7 @@ static void
 grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby,
             enum pipe_mpeg12_dct_type dct_type, unsigned cbp, short *blocks)
 {
-   unsigned tb = 0, sb = 0;
+   unsigned tb = 0;
    unsigned x, y;
 
    assert(r);
@@ -1323,10 +1320,9 @@ grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby,
 
    for (y = 0; y < 2; ++y) {
       for (x = 0; x < 2; ++x, ++tb) {
-         if ((cbp >> (5 - tb)) & 1) {
-            vl_idct_add_block(&r->idct_y, mbx * 2 + x, mby * 2 + y, blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT);
-            ++sb;
-         }
+         bool eb = !(cbp  & (1 << (5 - tb)));
+         vl_idct_add_block(&r->idct_y, mbx * 2 + x, mby * 2 + y, eb ? NULL : blocks);
+         blocks += eb ? 0 : BLOCK_WIDTH * BLOCK_HEIGHT;
       }
    }
 
@@ -1334,13 +1330,12 @@ grab_blocks(struct vl_mpeg12_mc_renderer *r, unsigned mbx, unsigned mby,
    assert(r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420);
 
    for (tb = 0; tb < 2; ++tb) {
-      if ((cbp >> (1 - tb)) & 1) {
-         if(tb == 0)
-            vl_idct_add_block(&r->idct_cb, mbx, mby, blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT);
-         else
-            vl_idct_add_block(&r->idct_cr, mbx, mby, blocks + sb * BLOCK_WIDTH * BLOCK_HEIGHT);
-         ++sb;
-      }
+      bool eb = !(cbp & (1 << (1 - tb)));
+      if(tb == 0)
+         vl_idct_add_block(&r->idct_cb, mbx, mby, eb ? NULL : blocks);
+      else
+         vl_idct_add_block(&r->idct_cr, mbx, mby, eb ? NULL : blocks);
+      blocks += eb ? 0 : BLOCK_WIDTH * BLOCK_HEIGHT;
    }
 }