From: Keith Whitwell Date: Thu, 8 Oct 2009 18:58:28 +0000 (+0100) Subject: llvmpipe: work on clears and coefficients X-Git-Tag: 062012170305~14503^2~258 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=0718c7700533a965d7cd06b4f67b82bbae6e66a1;p=profile%2Fivi%2Fmesa.git llvmpipe: work on clears and coefficients --- diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c index 110caaf..695ddc0 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.c +++ b/src/gallium/drivers/llvmpipe/lp_rast.c @@ -48,14 +48,17 @@ struct lp_rasterizer *lp_rast_create( void ) return rast; } -void lp_rast_bind_surfaces( struct lp_rasterizer *rast, - struct pipe_surface *cbuf, - struct pipe_surface *zsbuf, - const float *clear_color, - double clear_depth, - unsigned clear_stencil) +void lp_rast_bind_color( struct lp_rasterizer *rast, + struct pipe_surface *cbuf, + boolean write_color ) { pipe_surface_reference(&rast->state.cbuf, cbuf); +} + +void lp_rast_bind_zstencil( struct lp_rasterizer *rast, + struct pipe_surface *zsbuf, + boolean write_zstencil ) +{ pipe_surface_reference(&rast->state.zsbuf, zsbuf); } diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h index 492e4b0..28bb0a6 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast.h +++ b/src/gallium/drivers/llvmpipe/lp_rast.h @@ -101,27 +101,17 @@ struct lp_rast_triangle { struct lp_rast_shader_inputs *inputs; }; -struct clear_tile { - boolean do_color; - boolean do_depth_stencil; - unsigned rgba; - unsigned depth_stencil; -}; - -struct load_tile { - boolean do_color; - boolean do_depth_stencil; -}; struct lp_rasterizer *lp_rast_create( void ); -void lp_rast_bind_surfaces( struct lp_rasterizer *, - struct pipe_surface *cbuf, - struct pipe_surface *zsbuf, - const float *clear_color, - double clear_depth, - unsigned clear_stencil); +void lp_rast_bind_color( struct lp_rasterizer *, + struct pipe_surface *cbuf, + boolean write_when_done ); + +void lp_rast_bind_depth( struct lp_rasterizer *, + struct pipe_surface *zsbuf, + boolean write_when_done ); /* Begining of each tile: */ @@ -174,8 +164,7 @@ void lp_rast_store_zstencil( struct lp_rasterizer *, /* End of tile: */ -void lp_rast_end_tile( struct lp_rasterizer *rast, - boolean write_depth ); +void lp_rast_end_tile( struct lp_rasterizer *rast ); /* Shutdown: */ diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c index 9016c4b..57ac854 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.c +++ b/src/gallium/drivers/llvmpipe/lp_setup.c @@ -58,6 +58,8 @@ static void reset_context( struct setup_context *setup ) { unsigned i, j; + /* Free binner command lists: + */ for (i = 0; i < setup->tiles_x; i++) { for (j = 0; j < setup->tiles_y; j++) { struct cmd_block_list *list = &setup->tile[i][j]; @@ -73,6 +75,8 @@ static void reset_context( struct setup_context *setup ) } } + /* Free binned data: + */ { struct data_block_list *list = &setup->data; struct data_block *block, *tmp; @@ -84,6 +88,10 @@ static void reset_context( struct setup_context *setup ) list->head = list->tail; } + + /* Reset some state: + */ + setup->clear.flags = 0; } @@ -131,7 +139,7 @@ rasterize_bins( struct setup_context *setup, } } - lp_rast_finish_tile( rast ); + lp_rast_end_tile( rast ); } } @@ -144,10 +152,10 @@ static void begin_binning( struct setup_context *setup ) { if (setup->fb.color) { - if (setup->fb.clear_color) + if (setup->clear.flags & PIPE_CLEAR_COLOR) bin_everywhere( setup, lp_rast_clear_color, - &setup->clear_data ); + &setup->clear.color ); else bin_everywhere( setup, lp_rast_load_color, @@ -155,10 +163,10 @@ begin_binning( struct setup_context *setup ) } if (setup->fb.zstencil) { - if (setup->fb.clear_zstencil) + if (setup->clear.flags & PIPE_CLEAR_DEPTHSTENCIL) bin_everywhere( setup, lp_rast_clear_zstencil, - &setup->clear_data ); + &setup->clear.zstencil ); else bin_everywhere( setup, lp_rast_load_zstencil, @@ -176,7 +184,7 @@ static void execute_clears( struct setup_context *setup ) { begin_binning( setup ); - rasterize_bins( setup ); + rasterize_bins( setup, TRUE ); } @@ -192,7 +200,7 @@ set_state( struct setup_context *setup, switch (new_state) { case SETUP_ACTIVE: if (old_state == SETUP_FLUSHED) - setup_begin_binning( setup ); + begin_binning( setup ); break; case SETUP_CLEARED: @@ -203,10 +211,10 @@ set_state( struct setup_context *setup, break; case SETUP_FLUSHED: - if (old_state == SETUP_CLEAR) + if (old_state == SETUP_CLEARED) execute_clears( setup ); else - rasterize_bins( setup ); + rasterize_bins( setup, TRUE ); break; } @@ -271,15 +279,20 @@ lp_setup_clear( struct setup_context *setup, } else { set_state( setup, SETUP_CLEARED ); + setup->clear.flags |= flags; if (flags & PIPE_CLEAR_COLOR) { - memcpy(setup->clear.color, color, sizeof setup->clear.color); + util_pack_color(rgba, + setup->fb.cbuf->format, + &setup->clear.color.clear_color ); } if (flags & PIPE_CLEAR_DEPTH_STENCIL) { - setup->clear.depth = clear_depth; - setup->clear.stencil = clear_stencil; + setup->clear.zstencil.clear_zstencil = + util_pack_z_stencil(setup->fb.zsbuf->format, + depth, + stencil); } } } @@ -293,6 +306,12 @@ lp_setup_set_fs_inputs( struct setup_context *setup, memcpy( setup->interp, interp, nr * sizeof interp[0] ); } +void +lp_setup_set_shader_state( struct setup_context *setup, + const struct jit_context *jc ) +{ +} + static void first_triangle( struct setup_context *setup, @@ -324,10 +343,10 @@ lp_setup_line(struct setup_context *setup, } void -lp_setup_triangle(struct setup_context *setup, - const float (*v0)[4], - const float (*v1)[4], - const float (*v2)[4]) +lp_setup_tri(struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4]) { setup->triangle( setup, v0, v1, v2 ); } diff --git a/src/gallium/drivers/llvmpipe/lp_setup.h b/src/gallium/drivers/llvmpipe/lp_setup.h index 6f560f5..7c81307 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup.h +++ b/src/gallium/drivers/llvmpipe/lp_setup.h @@ -65,6 +65,17 @@ void lp_setup_point( struct setup_context *setup, const float (*v0)[4] ); + +void +lp_setup_flush( struct setup_context *setup, + unsigned flags ); + + +void +lp_setup_bind_framebuffer( struct setup_context *setup, + struct pipe_surface *color, + struct pipe_surface *zstencil ); + void lp_setup_set_triangle_state( struct setup_context *setup, unsigned cullmode, @@ -75,6 +86,10 @@ lp_setup_set_fs_inputs( struct setup_context *setup, const enum lp_interp *interp, unsigned nr ); +void +lp_setup_set_shader_state( struct setup_context *setup, + const struct jit_context *jc ); + boolean lp_setup_is_texture_referenced( struct setup_context *setup, const struct pipe_texture *texture ); diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h index 19d163d..5722e3e 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_context.h +++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h @@ -88,9 +88,8 @@ struct setup_context { struct { unsigned flags; - float clear_color[4]; - double clear_depth; - unsigned clear_stencil; + union lp_rast_cmd_arg color; + union lp_rast_cmd_arg zstencil; } clear; enum { diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 75a0ea8..efd9112 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -38,55 +38,60 @@ /** * Compute a0 for a constant-valued coefficient (GL_FLAT shading). */ -static void constant_coef( struct tgsi_interp_coef *coef, +static void constant_coef( struct lp_rast_triangle *tri, const float (*v3)[4], unsigned vert_attr, unsigned i ) { - coef->a0[i] = v3[vert_attr][i]; - coef->dadx[i] = 0; - coef->dady[i] = 0; + tri->inputs.a0[i] = v3[vert_attr][i]; + tri->inputs.dadx[i] = 0; + tri->inputs.dady[i] = 0; } /** * Compute a0, dadx and dady for a linearly interpolated coefficient, * for a triangle. */ -static void linear_coef( struct triangle *tri, - struct tgsi_interp_coef *coef, +static void linear_coef( struct lp_rast_triangle *tri, + unsigned input, const float (*v1)[4], const float (*v2)[4], const float (*v3)[4], - unsigned vert_attr, - unsigned i) + unsigned vert_attr) { - float a1 = v1[vert_attr][i]; - float a2 = v2[vert_attr][i]; - float a3 = v3[vert_attr][i]; - - float da12 = a1 - a2; - float da31 = a3 - a1; - float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea; - float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea; - - coef->dadx[i] = dadx; - coef->dady[i] = dady; - - /* calculate a0 as the value which would be sampled for the - * fragment at (0,0), taking into account that we want to sample at - * pixel centers, in other words (0.5, 0.5). - * - * this is neat but unfortunately not a good way to do things for - * triangles with very large values of dadx or dady as it will - * result in the subtraction and re-addition from a0 of a very - * large number, which means we'll end up loosing a lot of the - * fractional bits and precision from a0. the way to fix this is - * to define a0 as the sample at a pixel center somewhere near vmin - * instead - i'll switch to this later. - */ - coef->a0[i] = (v1[vert_attr][i] - - (dadx * (v1[0][0] - 0.5f) + - dady * (v1[0][1] - 0.5f))); + unsigned i; + + input *= 4; + + for (i = 0; i < NUM_CHANNELS; i++) { + float a1 = v1[vert_attr][i]; + float a2 = v2[vert_attr][i]; + float a3 = v3[vert_attr][i]; + + float da12 = a1 - a2; + float da31 = a3 - a1; + float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea; + float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea; + + tri->inputs.dadx[input+i] = dadx; + tri->inputs.dady[input+i] = dady; + + /* calculate a0 as the value which would be sampled for the + * fragment at (0,0), taking into account that we want to sample at + * pixel centers, in other words (0.5, 0.5). + * + * this is neat but unfortunately not a good way to do things for + * triangles with very large values of dadx or dady as it will + * result in the subtraction and re-addition from a0 of a very + * large number, which means we'll end up loosing a lot of the + * fractional bits and precision from a0. the way to fix this is + * to define a0 as the sample at a pixel center somewhere near vmin + * instead - i'll switch to this later. + */ + tri->inputs.a0[input+i] = (v1[vert_attr][i] - + (dadx * (v1[0][0] - 0.5f) + + dady * (v1[0][1] - 0.5f))); + } } @@ -98,30 +103,35 @@ static void linear_coef( struct triangle *tri, * Later, when we compute the value at a particular fragment position we'll * divide the interpolated value by the interpolated W at that fragment. */ -static void perspective_coef( struct triangle *tri, - struct tgsi_interp_coef *coef, +static void perspective_coef( struct lp_rast_triangle *tri, const float (*v1)[4], const float (*v2)[4], const float (*v3)[4], unsigned vert_attr, unsigned i) { - /* premultiply by 1/w (v[0][3] is always 1/w): - */ - float a1 = v1[vert_attr][i] * v1[0][3]; - float a2 = v2[vert_attr][i] * v2[0][3]; - float a3 = v3[vert_attr][i] * v3[0][3]; - float da12 = a1 - a2; - float da31 = a3 - a1; - float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea; - float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea; - - - coef->dadx[i] = dadx; - coef->dady[i] = dady; - coef->a0[i] = (a1 - - (dadx * (v1[0][0] - 0.5f) + - dady * (v1[0][1] - 0.5f))); + unsigned i; + + input *= 4; + + for (i = 0; i < NUM_CHANNELS; i++) { + /* premultiply by 1/w (v[0][3] is always 1/w): + */ + float a1 = v1[vert_attr][i] * v1[0][3]; + float a2 = v2[vert_attr][i] * v2[0][3]; + float a3 = v3[vert_attr][i] * v3[0][3]; + float da12 = a1 - a2; + float da31 = a3 - a1; + float dadx = (da12 * tri->dy31 - tri->dy12 * da31) * tri->oneoverarea; + float dady = (da31 * tri->dx12 - tri->dx31 * da12) * tri->oneoverarea; + + + tri->inputs.dadx[input+i] = dadx; + tri->inputs.dady[input+i] = dady; + tri->inputs.a0[input+i] = (a1 - + (dadx * (v1[0][0] - 0.5f) + + dady * (v1[0][1] - 0.5f))); + } } @@ -132,24 +142,26 @@ static void perspective_coef( struct triangle *tri, * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. */ static void -setup_fragcoord_coef(struct triangle *tri, unsigned slot) +setup_fragcoord_coef(struct lp_rast_triangle *tri, unsigned slot) { + slot *= 4; + /*X*/ - tri->coef[slot].a0[0] = 0.0; - tri->coef[slot].dadx[0] = 1.0; - tri->coef[slot].dady[0] = 0.0; + tri->inputs.a0[slot+0] = 0.0; + tri->inputs.dadx[slot+0] = 1.0; + tri->inputs.dady[slot+0] = 0.0; /*Y*/ - tri->coef[slot].a0[1] = 0.0; - tri->coef[slot].dadx[1] = 0.0; - tri->coef[slot].dady[1] = 1.0; + tri->inputs.a0[slot+1] = 0.0; + tri->inputs.dadx[slot+1] = 0.0; + tri->inputs.dady[slot+1] = 1.0; /*Z*/ - tri->coef[slot].a0[2] = tri->position_coef.a0[2]; - tri->coef[slot].dadx[2] = tri->position_coef.dadx[2]; - tri->coef[slot].dady[2] = tri->position_coef.dady[2]; + tri->inputs.a0[slot+2] = tri->inputs.a0[2]; + tri->inputs.dadx[slot+2] = tri->inputs.dadx[2]; + tri->inputs.dady[slot+2] = tri->inputs.dady[2]; /*W*/ - tri->coef[slot].a0[3] = tri->position_coef.a0[3]; - tri->coef[slot].dadx[3] = tri->position_coef.dadx[3]; - tri->coef[slot].dady[3] = tri->position_coef.dady[3]; + tri->inputs.a0[slot+3] = tri->inputs.a0[3]; + tri->inputs.dadx[slot+3] = tri->inputs.dadx[3]; + tri->inputs.dady[slot+3] = tri->inputs.dady[3]; } @@ -158,50 +170,46 @@ setup_fragcoord_coef(struct triangle *tri, unsigned slot) * Compute the tri->coef[] array dadx, dady, a0 values. */ static void setup_tri_coefficients( struct setup_context *setup, - struct triangle *tri, + struct lp_rast_triangle *tri, const float (*v1)[4], const float (*v2)[4], const float (*v3)[4], boolean frontface ) { - const struct vertex_info *vinfo = setup->vinfo; unsigned input; /* z and w are done by linear interpolation: */ - linear_coef(tri, tri->position_coef, v1, v2, v3, 0, 2); - linear_coef(tri, tri->position_coef, v1, v2, v3, 0, 3); + setup_fragcoord_coef(tri, 0); + linear_coef(tri, input, v1, v2, v3, vert_attr, i); - /* setup interpolation for all the remaining attributes: + /* setup interpolation for all the remaining attrbutes: */ - for (input = 0; input < vinfo->num_fs_inputs; input++) { - unsigned vert_attr = vinfo->attrib[input].src_index; + for (input = 0; input < setup->fs.nr_inputs; input++) { + unsigned vert_attr = setup->fs.input[input].src_index; unsigned i; - switch (vinfo->attrib[input].interp_mode) { - case INTERP_CONSTANT: - for (i = 0; i < NUM_CHANNELS; i++) - constant_coef(tri->coef[input], v3, vert_attr, i); + switch (setup->fs.input[input].interp_mode) { + case LP_INTERP_CONSTANT: + constant_coef(tri, input, v3, vert_attr, i); break; - case INTERP_LINEAR: - for (i = 0; i < NUM_CHANNELS; i++) - linear_coef(tri, tri->coef[input], v1, v2, v3, vert_attr, i); + case LP_INTERP_LINEAR: + linear_coef(tri, input, v1, v2, v3, vert_attr, i); break; - case INTERP_PERSPECTIVE: - for (i = 0; i < NUM_CHANNELS; i++) - perspective_coef(tri, tri->coef[input], v1, v2, v3, vert_attr, i); + case LP_INTERP_PERSPECTIVE: + perspective_coef(tri, input, v1, v2, v3, vert_attr, i); break; - case INTERP_POS: + case LP_INTERP_POS: setup_fragcoord_coef(tri, input); break; - case INTERP_FACING: - tri->coef[input].a0[0] = 1.0f - frontface; - tri->coef[input].dadx[0] = 0.0; - tri->coef[input].dady[0] = 0.0; + case LP_INTERP_FACING: + tri->inputs.a0[input*4+0] = 1.0f - frontface; + tri->inputs.dadx[input*4+0] = 0.0; + tri->da[input].dady[0] = 0.0; break; default: @@ -255,7 +263,7 @@ do_triangle_ccw(struct lp_setup *setup, const float x2 = subpixel_snap(v2[0][0]); const float x3 = subpixel_snap(v3[0][0]); - struct triangle *tri = allocate_triangle( setup ); + struct lp_setup_triangle *tri = lp_setup_alloc_data( setup, sizeof *tri ); float area; float c1, c2, c3; int i;