From 287531772ccea82c8a6c4dab5656d751a8943524 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Wed, 16 Jun 2010 14:42:17 -0400 Subject: [PATCH] draw: rewrite stream output to handle all the dark corners register masks, multiple output buffers, multiple primitives, non-linear vertices (elts) and stride semantics. --- src/gallium/auxiliary/draw/draw_gs.c | 1 - src/gallium/auxiliary/draw/draw_pt.h | 3 +- .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 2 +- .../draw/draw_pt_fetch_shade_pipeline_llvm.c | 3 +- src/gallium/auxiliary/draw/draw_pt_so_emit.c | 303 +++++++++++++-------- src/gallium/auxiliary/draw/draw_vbuf.h | 2 +- src/gallium/drivers/softpipe/sp_prim_vbuf.c | 10 +- 7 files changed, 196 insertions(+), 128 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c index c2c08fd..d6430c6 100644 --- a/src/gallium/auxiliary/draw/draw_gs.c +++ b/src/gallium/auxiliary/draw/draw_gs.c @@ -254,7 +254,6 @@ static void draw_fetch_gs_input(struct draw_geometry_shader *shader, } } - static void gs_flush(struct draw_geometry_shader *shader, unsigned input_primitives) { diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h index 739420f..b6741ca 100644 --- a/src/gallium/auxiliary/draw/draw_pt.h +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -182,8 +182,7 @@ struct pt_emit *draw_pt_emit_create( struct draw_context *draw ); */ struct pt_so_emit; -void draw_pt_so_emit_prepare( struct pt_so_emit *emit, - unsigned prim ); +void draw_pt_so_emit_prepare( struct pt_so_emit *emit ); void draw_pt_so_emit( struct pt_so_emit *emit, const struct draw_vertex_info *vert_info, diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index 599c495..43b08a0 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -104,7 +104,7 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, (boolean)draw->rasterizer->gl_rasterization_rules, (draw->vs.edgeflag_output ? true : false) ); - draw_pt_so_emit_prepare( fpme->so_emit, out_prim ); + draw_pt_so_emit_prepare( fpme->so_emit ); if (!(opt & PT_PIPELINE)) { draw_pt_emit_prepare( fpme->emit, diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c index 65a9d4e..7d2de58 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c @@ -109,7 +109,8 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle, (boolean)draw->rasterizer->gl_rasterization_rules, (draw->vs.edgeflag_output ? true : false) ); - draw_pt_so_emit_prepare( fpme->so_emit, out_prim ); + draw_pt_so_emit_prepare( fpme->so_emit ); + if (!(opt & PT_PIPELINE)) { draw_pt_emit_prepare( fpme->emit, out_prim, diff --git a/src/gallium/auxiliary/draw/draw_pt_so_emit.c b/src/gallium/auxiliary/draw/draw_pt_so_emit.c index f5abb79..1877afb 100644 --- a/src/gallium/auxiliary/draw/draw_pt_so_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_so_emit.c @@ -25,88 +25,35 @@ * **************************************************************************/ -#include "util/u_memory.h" #include "draw/draw_context.h" #include "draw/draw_private.h" #include "draw/draw_vbuf.h" #include "draw/draw_vertex.h" #include "draw/draw_pt.h" -#include "translate/translate.h" -#include "translate/translate_cache.h" + +#include "util/u_math.h" +#include "util/u_memory.h" struct pt_so_emit { struct draw_context *draw; - struct translate *translate; + void *buffers[PIPE_MAX_SO_BUFFERS]; - struct translate_cache *cache; - unsigned prim; + unsigned input_vertex_stride; + const float (*inputs)[4]; - const struct vertex_info *vinfo; boolean has_so; -}; -static void -prepare_so_emit( struct pt_so_emit *emit, - const struct vertex_info *vinfo ) -{ - struct draw_context *draw = emit->draw; - unsigned i; - struct translate_key hw_key; - unsigned dst_offset = 0; - - if (emit->has_so) { - for (i = 0; i < draw->so.state.num_outputs; ++i) { - unsigned src_offset = (draw->so.state.register_index[i] * 4 * - sizeof(float) ); - unsigned output_format; - unsigned emit_sz = 0; - /*unsigned output_bytes = util_format_get_blocksize(output_format); - unsigned nr_compo = util_format_get_nr_components(output_format);*/ - - output_format = draw_translate_vinfo_format(vinfo->attrib[i].emit); - emit_sz = draw_translate_vinfo_size(vinfo->attrib[i].emit); - - /* doesn't handle EMIT_OMIT */ - assert(emit_sz != 0); - - if (draw->so.state.register_mask[i] != TGSI_WRITEMASK_XYZW) { - /* we only support rendering with XYZW writemask*/ - debug_printf("NOT_IMPLEMENTED(writemask with stream output) at %s: %s:%d\n", - __FUNCTION__, __FILE__, __LINE__); - } + boolean single_buffer; - hw_key.element[i].type = TRANSLATE_ELEMENT_NORMAL; - hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - hw_key.element[i].input_buffer = 0; - hw_key.element[i].input_offset = src_offset; - hw_key.element[i].instance_divisor = 0; - hw_key.element[i].output_format = output_format; - hw_key.element[i].output_offset = dst_offset; - - dst_offset += emit_sz; - } - hw_key.nr_elements = draw->so.state.num_outputs; - hw_key.output_stride = draw->so.state.stride; - - if (!emit->translate || - translate_key_compare(&emit->translate->key, &hw_key) != 0) - { - translate_key_sanitize(&hw_key); - emit->translate = translate_cache_find(emit->cache, &hw_key); - } - } else { - /* no stream output */ - emit->translate = NULL; - } -} + unsigned emitted_primitives; + unsigned emitted_vertices; +}; -void draw_pt_so_emit_prepare( struct pt_so_emit *emit, - unsigned prim ) +void draw_pt_so_emit_prepare(struct pt_so_emit *emit) { struct draw_context *draw = emit->draw; - boolean ok; emit->has_so = (draw->so.state.num_outputs > 0); @@ -116,77 +63,205 @@ void draw_pt_so_emit_prepare( struct pt_so_emit *emit, /* XXX: need to flush to get prim_vbuf.c to release its allocation?? */ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); +} - emit->prim = prim; +static boolean +is_component_writable(unsigned mask, + unsigned compo) +{ + switch (mask) { + case TGSI_WRITEMASK_NONE: + return FALSE; + case TGSI_WRITEMASK_X: + return compo == 0; + case TGSI_WRITEMASK_Y: + return compo == 1; + case TGSI_WRITEMASK_XY: + return compo == 0 || compo == 1; + case TGSI_WRITEMASK_Z: + return compo == 2; + case TGSI_WRITEMASK_XZ: + return compo == 0 || compo == 2; + case TGSI_WRITEMASK_YZ: + return compo == 1 || compo == 2; + case TGSI_WRITEMASK_XYZ: + return compo == 0 || compo == 1 || compo == 2; + case TGSI_WRITEMASK_W: + return compo == 3; + case TGSI_WRITEMASK_XW: + return compo == 0 || compo == 3; + case TGSI_WRITEMASK_YW: + return compo == 1 || compo == 3; + case TGSI_WRITEMASK_XYW: + return compo == 0 || compo == 1 || compo == 3; + case TGSI_WRITEMASK_ZW: + return compo == 2 || compo == 3; + case TGSI_WRITEMASK_XZW: + return compo == 0 || compo == 1 || compo == 3; + case TGSI_WRITEMASK_YZW: + return compo == 1 || compo == 2 || compo == 4; + case TGSI_WRITEMASK_XYZW: + return compo >= 0 && compo < 4; + default: + debug_assert(!"Unknown writemask in stream out"); + return compo >= 0 && compo < 4; + } +} - ok = draw->render->set_primitive(draw->render, emit->prim); - if (!ok) { - assert(0); - return; +static void so_emit_prim(struct pt_so_emit *so, + unsigned *indices, + unsigned num_vertices) +{ + unsigned slot, i; + unsigned input_vertex_stride = so->input_vertex_stride; + struct draw_context *draw = so->draw; + const float (*input_ptr)[4]; + const struct pipe_stream_output_state *state = + &draw->so.state; + float **buffer = 0; + + input_ptr = so->inputs; + + for (i = 0; i < num_vertices; ++i) { + const float (*input)[4]; + /*debug_printf("%d) vertex index = %d (prim idx = %d)\n", i, indices[i], prim_idx);*/ + input = (const float (*)[4])( + (const char *)input_ptr + (indices[i] * input_vertex_stride)); + for (slot = 0; slot < state->num_outputs; ++slot) { + unsigned idx = state->register_index[slot]; + unsigned writemask = state->register_mask[slot]; + unsigned compo; + unsigned written_compos = 0; + + buffer = (float**)&so->buffers[state->output_buffer[slot]]; + + /*debug_printf("\tSlot = %d, vs_slot = %d, idx = %d:\n", + slot, vs_slot, idx);*/ +#if 1 + assert(!util_is_inf_or_nan(input[idx][0])); + assert(!util_is_inf_or_nan(input[idx][1])); + assert(!util_is_inf_or_nan(input[idx][2])); + assert(!util_is_inf_or_nan(input[idx][3])); +#endif + for (compo = 0; compo < 4; ++compo) { + if (is_component_writable(writemask, compo)) { + float *buf = *buffer; + buf[written_compos++] = input[idx][compo]; + } + } +#if 0 + debug_printf("\t\t(writemask = %d)%f %f %f %f\n", + writemask, + input[idx][0], + input[idx][1], + input[idx][2], + input[idx][3]); +#endif + if (!so->single_buffer) + *buffer += written_compos; + } + if (so->single_buffer) + *buffer = (float*) (((char*)*buffer) + state->stride); } + so->emitted_vertices += num_vertices; + ++so->emitted_primitives; +} + +static void so_point(struct pt_so_emit *so, int idx) +{ + unsigned indices[1]; + + indices[0] = idx; + + so_emit_prim(so, indices, 1); +} + +static void so_line(struct pt_so_emit *so, int i0, int i1) +{ + unsigned indices[2]; - /* Must do this after set_primitive() above: */ - emit->vinfo = draw->render->get_vertex_info(draw->render); + indices[0] = i0; + indices[1] = i1; - prepare_so_emit( emit, emit->vinfo ); + so_emit_prim(so, indices, 2); } +static void so_tri(struct pt_so_emit *so, int i0, int i1, int i2) +{ + unsigned indices[3]; + + indices[0] = i0; + indices[1] = i1; + indices[2] = i2; + + so_emit_prim(so, indices, 3); +} + + +#define TRIANGLE(gs,i0,i1,i2) so_tri(so,i0,i1,i2) +#define LINE(gs,i0,i1) so_line(so,i0,i1) +#define POINT(gs,i0) so_point(so,i0) +#define FUNC so_run_linear +#define LOCAL_VARS +#include "draw_so_emit_tmp.h" +#undef LOCAL_VARS +#undef FUNC + + +#define TRIANGLE(gs,i0,i1,i2) so_tri(gs,elts[i0],elts[i1],elts[i2]) +#define LINE(gs,i0,i1) so_line(gs,elts[i0],elts[i1]) +#define POINT(gs,i0) so_point(gs,elts[i0]) +#define FUNC so_run_elts +#define LOCAL_VARS \ + const ushort *elts = input_prims->elts; +#include "draw_so_emit_tmp.h" +#undef LOCAL_VARS +#undef FUNC + void draw_pt_so_emit( struct pt_so_emit *emit, - const struct draw_vertex_info *vert_info, - const struct draw_prim_info *prim_info ) + const struct draw_vertex_info *input_verts, + const struct draw_prim_info *input_prims ) { - const float (*vertex_data)[4] = (const float (*)[4])vert_info->verts->data; - unsigned vertex_count = vert_info->count; - unsigned stride = vert_info->stride; struct draw_context *draw = emit->draw; - struct translate *translate = emit->translate; struct vbuf_render *render = draw->render; - void *so_buffer; unsigned start, i; if (!emit->has_so) return; - so_buffer = draw->so.buffers[0]; + emit->emitted_vertices = 0; + emit->emitted_primitives = 0; + emit->input_vertex_stride = input_verts->stride; + emit->inputs = (const float (*)[4])input_verts->verts->data; + for (i = 0; i < draw->so.num_buffers; ++i) + emit->buffers[i] = draw->so.buffers[i]; + emit->single_buffer = TRUE; + for (i = 0; i < draw->so.state.num_outputs; ++i) { + if (draw->so.state.output_buffer[i] != 0) + emit->single_buffer = FALSE; + } /* XXX: need to flush to get prim_vbuf.c to release its allocation??*/ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); - if (vertex_count == 0) - return; - - if (vertex_count >= UNDEFINED_VERTEX_ID) { - assert(0); - return; - } - - /* XXX we only support single output buffer */ - if (draw->so.num_buffers != 1) { - debug_printf("NOT_IMPLEMENTED(multiple stream output buffers) at %s: %s:%d\n", - __FUNCTION__, __FILE__, __LINE__); - } - - translate->set_buffer(translate, 0, vertex_data, - stride, ~0); - - for (start = i = 0; i < prim_info->primitive_count; - start += prim_info->primitive_lengths[i], i++) + for (start = i = 0; i < input_prims->primitive_count; + start += input_prims->primitive_lengths[i], i++) { - unsigned count = prim_info->primitive_lengths[i]; - - if (prim_info->linear) { - translate->run(translate, start, count, - draw->instance_id, so_buffer); - } - else { - debug_assert(!"Stream output can't handle non-linear prims yet"); - translate->run(translate, start, count, - draw->instance_id, so_buffer); + unsigned count = input_prims->primitive_lengths[i]; + + if (input_prims->linear) { + so_run_linear(emit, input_prims, input_verts, + start, count); + } else { + so_run_elts(emit, input_prims, input_verts, + start, count); } } - render->set_stream_output_info(render, 0, vertex_count); + render->set_stream_output_info(render, + emit->emitted_primitives, + emit->emitted_vertices); } @@ -197,19 +272,11 @@ struct pt_so_emit *draw_pt_so_emit_create( struct draw_context *draw ) return NULL; emit->draw = draw; - emit->cache = translate_cache_create(); - if (!emit->cache) { - FREE(emit); - return NULL; - } return emit; } void draw_pt_so_emit_destroy( struct pt_so_emit *emit ) { - if (emit->cache) - translate_cache_destroy(emit->cache); - FREE(emit); } diff --git a/src/gallium/auxiliary/draw/draw_vbuf.h b/src/gallium/auxiliary/draw/draw_vbuf.h index 8d97682..e32803c 100644 --- a/src/gallium/auxiliary/draw/draw_vbuf.h +++ b/src/gallium/auxiliary/draw/draw_vbuf.h @@ -123,7 +123,7 @@ struct vbuf_render { * Called after writing data to the stream out buffers */ void (*set_stream_output_info)( struct vbuf_render *vbufr, - unsigned buffer_index, + unsigned primitive_count, unsigned vertices_count ); }; diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c index ddfe56f..c60249d 100644 --- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c @@ -543,15 +543,17 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) } static void -sp_vbuf_so_info(struct vbuf_render *vbr, uint buffer, uint vertices) +sp_vbuf_so_info(struct vbuf_render *vbr, uint primitives, uint vertices) { struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); struct softpipe_context *softpipe = cvbr->softpipe; + unsigned i; - softpipe->so_target.so_count[buffer] += vertices; + for (i = 0; i < softpipe->so_target.num_buffers; ++i) { + softpipe->so_target.so_count[i] += vertices; + } - softpipe->so_stats.num_primitives_written = - vertices / u_vertices_per_prim(cvbr->prim); + softpipe->so_stats.num_primitives_written = primitives; softpipe->so_stats.primitives_storage_needed = vertices * 4 /*sizeof(float|int32)*/ * 4 /*x,y,z,w*/; } -- 2.7.4