From eaf2c738991d43ec8e7b36bed05727deaf8151b6 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Fri, 29 May 2015 15:25:13 +0800 Subject: [PATCH] ilo: embed ilo_state_sol in ilo_shader --- src/gallium/drivers/ilo/core/ilo_builder_3d_top.h | 137 ++++----------------- src/gallium/drivers/ilo/ilo_blitter.h | 2 + src/gallium/drivers/ilo/ilo_blitter_rectlist.c | 2 + src/gallium/drivers/ilo/ilo_render_gen7.c | 45 ++++--- src/gallium/drivers/ilo/ilo_shader.c | 108 +++++++++++++--- src/gallium/drivers/ilo/ilo_shader.h | 4 + src/gallium/drivers/ilo/ilo_state.h | 1 + .../drivers/ilo/shader/ilo_shader_internal.h | 6 +- 8 files changed, 154 insertions(+), 151 deletions(-) diff --git a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h index d5a4c77..3a25221 100644 --- a/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h +++ b/src/gallium/drivers/ilo/core/ilo_builder_3d_top.h @@ -37,6 +37,7 @@ #include "ilo_dev.h" #include "ilo_state_3d.h" #include "ilo_state_sampler.h" +#include "ilo_state_sol.h" #include "ilo_builder.h" static inline void @@ -1013,131 +1014,41 @@ gen7_disable_3DSTATE_GS(struct ilo_builder *builder) static inline void gen7_3DSTATE_STREAMOUT(struct ilo_builder *builder, - int render_stream, - bool render_disable, - int vertex_attrib_count, - const int *buf_strides) + const struct ilo_state_sol *sol) { const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) ? 5 : 3; uint32_t *dw; - int buf_mask; ILO_DEV_ASSERT(builder->dev, 7, 8); ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_STREAMOUT) | (cmd_len - 2); - - dw[1] = render_stream << GEN7_SO_DW1_RENDER_STREAM_SELECT__SHIFT; - if (render_disable) - dw[1] |= GEN7_SO_DW1_RENDER_DISABLE; - - if (buf_strides) { - buf_mask = ((bool) buf_strides[3]) << 3 | - ((bool) buf_strides[2]) << 2 | - ((bool) buf_strides[1]) << 1 | - ((bool) buf_strides[0]); - if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) { - dw[3] = buf_strides[1] << 16 | buf_strides[0]; - dw[4] = buf_strides[3] << 16 | buf_strides[1]; - } - } else { - buf_mask = 0; - } - - if (buf_mask) { - int read_len; - - dw[1] |= GEN7_SO_DW1_SO_ENABLE | - GEN7_SO_DW1_STATISTICS; - /* API_OPENGL */ - if (true) - dw[1] |= GEN7_REORDER_TRAILING << GEN7_SO_DW1_REORDER_MODE__SHIFT; - if (ilo_dev_gen(builder->dev) < ILO_GEN(8)) - dw[1] |= buf_mask << GEN7_SO_DW1_BUFFER_ENABLES__SHIFT; - - read_len = (vertex_attrib_count + 1) / 2; - if (!read_len) - read_len = 1; - - dw[2] = 0 << GEN7_SO_DW2_STREAM3_READ_OFFSET__SHIFT | - (read_len - 1) << GEN7_SO_DW2_STREAM3_READ_LEN__SHIFT | - 0 << GEN7_SO_DW2_STREAM2_READ_OFFSET__SHIFT | - (read_len - 1) << GEN7_SO_DW2_STREAM2_READ_LEN__SHIFT | - 0 << GEN7_SO_DW2_STREAM1_READ_OFFSET__SHIFT | - (read_len - 1) << GEN7_SO_DW2_STREAM1_READ_LEN__SHIFT | - 0 << GEN7_SO_DW2_STREAM0_READ_OFFSET__SHIFT | - (read_len - 1) << GEN7_SO_DW2_STREAM0_READ_LEN__SHIFT; - } else { - dw[2] = 0; + /* see sol_set_gen7_3DSTATE_STREAMOUT() */ + dw[1] = sol->so[0]; + dw[2] = sol->so[1]; + if (ilo_dev_gen(builder->dev) >= ILO_GEN(8)) { + dw[3] = sol->so[2]; + dw[4] = sol->so[3]; } } static inline void gen7_3DSTATE_SO_DECL_LIST(struct ilo_builder *builder, - const struct pipe_stream_output_info *so_info) + const struct ilo_state_sol *sol) { /* * Note that "DWord Length" has 9 bits for this command and the type of * cmd_len cannot be uint8_t. */ uint16_t cmd_len; - struct { - int buf_selects; - int decl_count; - uint16_t decls[128]; - } streams[4]; - unsigned buf_offsets[PIPE_MAX_SO_BUFFERS]; - int hw_decl_count, i; + int cmd_decl_count; uint32_t *dw; ILO_DEV_ASSERT(builder->dev, 7, 8); - memset(streams, 0, sizeof(streams)); - memset(buf_offsets, 0, sizeof(buf_offsets)); - - for (i = 0; i < so_info->num_outputs; i++) { - unsigned decl, st, buf, reg, mask; - - st = so_info->output[i].stream; - buf = so_info->output[i].output_buffer; - - /* pad with holes */ - while (buf_offsets[buf] < so_info->output[i].dst_offset) { - int num_dwords; - - num_dwords = so_info->output[i].dst_offset - buf_offsets[buf]; - if (num_dwords > 4) - num_dwords = 4; - - decl = buf << GEN7_SO_DECL_OUTPUT_SLOT__SHIFT | - GEN7_SO_DECL_HOLE_FLAG | - ((1 << num_dwords) - 1) << GEN7_SO_DECL_COMPONENT_MASK__SHIFT; - - assert(streams[st].decl_count < Elements(streams[st].decls)); - streams[st].decls[streams[st].decl_count++] = decl; - buf_offsets[buf] += num_dwords; - } - assert(buf_offsets[buf] == so_info->output[i].dst_offset); - - reg = so_info->output[i].register_index; - mask = ((1 << so_info->output[i].num_components) - 1) << - so_info->output[i].start_component; - - decl = buf << GEN7_SO_DECL_OUTPUT_SLOT__SHIFT | - reg << GEN7_SO_DECL_REG_INDEX__SHIFT | - mask << GEN7_SO_DECL_COMPONENT_MASK__SHIFT; - - assert(streams[st].decl_count < Elements(streams[st].decls)); - - streams[st].buf_selects |= 1 << buf; - streams[st].decls[streams[st].decl_count++] = decl; - buf_offsets[buf] += so_info->output[i].num_components; - } - if (ilo_dev_gen(builder->dev) >= ILO_GEN(7.5)) { - hw_decl_count = MAX4(streams[0].decl_count, streams[1].decl_count, - streams[2].decl_count, streams[3].decl_count); + cmd_decl_count = sol->decl_count; } else { /* * From the Ivy Bridge PRM, volume 2 part 1, page 201: @@ -1146,28 +1057,22 @@ gen7_3DSTATE_SO_DECL_LIST(struct ilo_builder *builder, * whenever this command is issued. The "Num Entries [n]" fields * still contain the actual numbers of valid decls." */ - hw_decl_count = 128; + cmd_decl_count = 128; } - cmd_len = 3 + 2 * hw_decl_count; + cmd_len = 3 + 2 * cmd_decl_count; ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_SO_DECL_LIST) | (cmd_len - 2); - dw[1] = streams[3].buf_selects << GEN7_SO_DECL_DW1_STREAM3_BUFFER_SELECTS__SHIFT | - streams[2].buf_selects << GEN7_SO_DECL_DW1_STREAM2_BUFFER_SELECTS__SHIFT | - streams[1].buf_selects << GEN7_SO_DECL_DW1_STREAM1_BUFFER_SELECTS__SHIFT | - streams[0].buf_selects << GEN7_SO_DECL_DW1_STREAM0_BUFFER_SELECTS__SHIFT; - dw[2] = streams[3].decl_count << GEN7_SO_DECL_DW2_STREAM3_ENTRY_COUNT__SHIFT | - streams[2].decl_count << GEN7_SO_DECL_DW2_STREAM2_ENTRY_COUNT__SHIFT | - streams[1].decl_count << GEN7_SO_DECL_DW2_STREAM1_ENTRY_COUNT__SHIFT | - streams[0].decl_count << GEN7_SO_DECL_DW2_STREAM0_ENTRY_COUNT__SHIFT; - dw += 3; - - for (i = 0; i < hw_decl_count; i++) { - dw[0] = streams[1].decls[i] << 16 | streams[0].decls[i]; - dw[1] = streams[3].decls[i] << 16 | streams[2].decls[i]; - dw += 2; + /* see sol_set_gen7_3DSTATE_SO_DECL_LIST() */ + dw[1] = sol->so[4]; + dw[2] = sol->so[5]; + memcpy(&dw[3], sol->decl, sizeof(sol->decl[0]) * sol->decl_count); + + if (sol->decl_count < cmd_decl_count) { + memset(&dw[3 + 2 * sol->decl_count], 0, sizeof(sol->decl[0]) * + cmd_decl_count - sol->decl_count); } } diff --git a/src/gallium/drivers/ilo/ilo_blitter.h b/src/gallium/drivers/ilo/ilo_blitter.h index 072f0f7..3d02063 100644 --- a/src/gallium/drivers/ilo/ilo_blitter.h +++ b/src/gallium/drivers/ilo/ilo_blitter.h @@ -61,6 +61,8 @@ struct ilo_blitter { struct ilo_ve_state ve; struct pipe_draw_info draw; + struct ilo_state_sol sol; + struct ilo_state_viewport vp; uint32_t vp_data[20]; diff --git a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c index 9d43195..b2b839c 100644 --- a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c +++ b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c @@ -64,6 +64,8 @@ ilo_blitter_set_invariants(struct ilo_blitter *blitter) blitter->draw.mode = ILO_PRIM_RECTANGLES; blitter->draw.count = 3; + ilo_state_sol_init_disabled(&blitter->sol, blitter->ilo->dev, false); + /** * From the Haswell PRM, volume 7, page 615: * diff --git a/src/gallium/drivers/ilo/ilo_render_gen7.c b/src/gallium/drivers/ilo/ilo_render_gen7.c index 0931a77..95884a0 100644 --- a/src/gallium/drivers/ilo/ilo_render_gen7.c +++ b/src/gallium/drivers/ilo/ilo_render_gen7.c @@ -420,7 +420,7 @@ gen7_draw_sol(struct ilo_render *r, const struct ilo_state_vector *vec, struct ilo_render_draw_session *session) { - const struct pipe_stream_output_info *so_info; + const struct ilo_state_sol *sol; const struct ilo_shader_state *shader; bool dirty_sh = false; @@ -433,13 +433,16 @@ gen7_draw_sol(struct ilo_render *r, dirty_sh = DIRTY(VS); } - so_info = ilo_shader_get_kernel_so_info(shader); + sol = ilo_shader_get_kernel_sol(shader); /* 3DSTATE_SO_BUFFER */ if ((DIRTY(SO) || dirty_sh || r->batch_bo_changed) && vec->so.enabled) { + const struct pipe_stream_output_info *so_info; int i; + so_info = ilo_shader_get_kernel_so_info(shader); + for (i = 0; i < vec->so.count; i++) { const int stride = so_info->stride[i] * 4; /* in bytes */ @@ -452,22 +455,30 @@ gen7_draw_sol(struct ilo_render *r, /* 3DSTATE_SO_DECL_LIST */ if (dirty_sh && vec->so.enabled) - gen7_3DSTATE_SO_DECL_LIST(r->builder, so_info); - - /* 3DSTATE_STREAMOUT */ - if (DIRTY(SO) || DIRTY(RASTERIZER) || dirty_sh) { - const int output_count = ilo_shader_get_kernel_param(shader, - ILO_KERNEL_OUTPUT_COUNT); - int buf_strides[4] = { 0, 0, 0, 0 }; - int i; + gen7_3DSTATE_SO_DECL_LIST(r->builder, sol); - for (i = 0; i < vec->so.count; i++) - buf_strides[i] = so_info->stride[i] * 4; + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 196-197: + * + * "Anytime the SOL unit MMIO registers or non-pipeline state are + * written, the SOL unit needs to receive a pipeline state update with + * SOL unit dirty state for information programmed in MMIO/NP to get + * loaded into the SOL unit. + * + * The SOL unit incorrectly double buffers MMIO/NP registers and only + * moves them into the design for usage when control topology is + * received with the SOL unit dirty state. + * + * If the state does not change, need to resend the same state. + * + * Because of corruption, software must flush the whole fixed function + * pipeline when 3DSTATE_STREAMOUT changes state." + * + * The first and fourth paragraphs are gone on Gen7.5+. + */ - gen7_3DSTATE_STREAMOUT(r->builder, 0, - vec->rasterizer->state.rasterizer_discard, - output_count, buf_strides); - } + /* 3DSTATE_STREAMOUT */ + gen7_3DSTATE_STREAMOUT(r->builder, sol); } static void @@ -717,7 +728,7 @@ gen7_rectlist_vs_to_sf(struct ilo_render *r, gen7_3DSTATE_CONSTANT_GS(r->builder, NULL, NULL, 0); gen7_disable_3DSTATE_GS(r->builder); - gen7_3DSTATE_STREAMOUT(r->builder, 0, false, 0x0, 0); + gen7_3DSTATE_STREAMOUT(r->builder, &blitter->sol); gen6_3DSTATE_CLIP(r->builder, &blitter->fb.rs); diff --git a/src/gallium/drivers/ilo/ilo_shader.c b/src/gallium/drivers/ilo/ilo_shader.c index af46706..e9eb042 100644 --- a/src/gallium/drivers/ilo/ilo_shader.c +++ b/src/gallium/drivers/ilo/ilo_shader.c @@ -557,39 +557,103 @@ ilo_shader_state_search_variant(struct ilo_shader_state *state, } static void -copy_so_info(struct ilo_shader *sh, - const struct pipe_stream_output_info *so_info) +init_sol(struct ilo_shader *kernel, + const struct ilo_dev *dev, + const struct pipe_stream_output_info *so_info, + bool rasterizer_discard) { - unsigned i, attr; + struct ilo_state_sol_decl_info decls[4][PIPE_MAX_SO_OUTPUTS]; + unsigned buf_offsets[PIPE_MAX_SO_BUFFERS]; + struct ilo_state_sol_info info; + unsigned i; - if (!so_info->num_outputs) + if (!so_info->num_outputs) { + ilo_state_sol_init_disabled(&kernel->sol, dev, rasterizer_discard); return; + } + + memset(&info, 0, sizeof(info)); + info.data = kernel->sol_data; + info.data_size = sizeof(kernel->sol_data); + info.sol_enable = true; + info.stats_enable = true; + info.tristrip_reorder = GEN7_REORDER_TRAILING; + info.render_disable = rasterizer_discard; + info.render_stream = 0; + + for (i = 0; i < 4; i++) { + info.buffer_strides[i] = so_info->stride[i] * 4; - sh->so_info = *so_info; + info.streams[i].cv_vue_attr_count = kernel->out.count; + info.streams[i].decls = decls[i]; + } + memset(decls, 0, sizeof(decls)); + memset(buf_offsets, 0, sizeof(buf_offsets)); for (i = 0; i < so_info->num_outputs; i++) { + const unsigned stream = so_info->output[i].stream; + const unsigned buffer = so_info->output[i].output_buffer; + struct ilo_state_sol_decl_info *decl; + unsigned attr; + /* figure out which attribute is sourced */ - for (attr = 0; attr < sh->out.count; attr++) { - const int reg_idx = sh->out.register_indices[attr]; + for (attr = 0; attr < kernel->out.count; attr++) { + const int reg_idx = kernel->out.register_indices[attr]; if (reg_idx == so_info->output[i].register_index) break; } - - if (attr < sh->out.count) { - sh->so_info.output[i].register_index = attr; - } - else { + if (attr >= kernel->out.count) { assert(!"stream output an undefined register"); - sh->so_info.output[i].register_index = 0; + attr = 0; } + if (info.streams[stream].vue_read_count < attr + 1) + info.streams[stream].vue_read_count = attr + 1; + + /* pad with holes first */ + while (buf_offsets[buffer] < so_info->output[i].dst_offset) { + int num_dwords; + + num_dwords = so_info->output[i].dst_offset - buf_offsets[buffer]; + if (num_dwords > 4) + num_dwords = 4; + + assert(info.streams[stream].decl_count < ARRAY_SIZE(decls[stream])); + decl = &decls[stream][info.streams[stream].decl_count]; + + decl->attr = 0; + decl->is_hole = true; + decl->component_base = 0; + decl->component_count = num_dwords; + decl->buffer = buffer; + + info.streams[stream].decl_count++; + buf_offsets[buffer] += num_dwords; + } + assert(buf_offsets[buffer] == so_info->output[i].dst_offset); + + assert(info.streams[stream].decl_count < ARRAY_SIZE(decls[stream])); + decl = &decls[stream][info.streams[stream].decl_count]; + + decl->attr = attr; + decl->is_hole = false; /* PSIZE is at W channel */ - if (sh->out.semantic_names[attr] == TGSI_SEMANTIC_PSIZE) { + if (kernel->out.semantic_names[attr] == TGSI_SEMANTIC_PSIZE) { assert(so_info->output[i].start_component == 0); assert(so_info->output[i].num_components == 1); - sh->so_info.output[i].start_component = 3; + decl->component_base = 3; + decl->component_count = 1; + } else { + decl->component_base = so_info->output[i].start_component; + decl->component_count = so_info->output[i].num_components; } + decl->buffer = buffer; + + info.streams[stream].decl_count++; + buf_offsets[buffer] += so_info->output[i].num_components; } + + ilo_state_sol_init(&kernel->sol, dev, &info); } /** @@ -599,17 +663,20 @@ static struct ilo_shader * ilo_shader_state_add_variant(struct ilo_shader_state *state, const struct ilo_shader_variant *variant) { + bool rasterizer_discard = false; struct ilo_shader *sh; switch (state->info.type) { case PIPE_SHADER_VERTEX: sh = ilo_shader_compile_vs(state, variant); + rasterizer_discard = variant->u.vs.rasterizer_discard; break; case PIPE_SHADER_FRAGMENT: sh = ilo_shader_compile_fs(state, variant); break; case PIPE_SHADER_GEOMETRY: sh = ilo_shader_compile_gs(state, variant); + rasterizer_discard = variant->u.gs.rasterizer_discard; break; case PIPE_SHADER_COMPUTE: sh = ilo_shader_compile_cs(state, variant); @@ -625,7 +692,8 @@ ilo_shader_state_add_variant(struct ilo_shader_state *state, sh->variant = *variant; - copy_so_info(sh, &state->info.stream_output); + init_sol(sh, state->info.dev, &state->info.stream_output, + rasterizer_discard); ilo_shader_state_add_shader(state, sh); @@ -1164,11 +1232,17 @@ ilo_shader_get_kernel_cso(const struct ilo_shader_state *shader) const struct pipe_stream_output_info * ilo_shader_get_kernel_so_info(const struct ilo_shader_state *shader) { + return &shader->info.stream_output; +} + +const struct ilo_state_sol * +ilo_shader_get_kernel_sol(const struct ilo_shader_state *shader) +{ const struct ilo_shader *kernel = shader->shader; assert(kernel); - return &kernel->so_info; + return &kernel->sol; } /** diff --git a/src/gallium/drivers/ilo/ilo_shader.h b/src/gallium/drivers/ilo/ilo_shader.h index 8a35900..ddcd6f0 100644 --- a/src/gallium/drivers/ilo/ilo_shader.h +++ b/src/gallium/drivers/ilo/ilo_shader.h @@ -96,6 +96,7 @@ struct ilo_rasterizer_state; struct ilo_shader_cache; struct ilo_shader_state; struct ilo_shader_cso; +struct ilo_state_sol; struct ilo_state_vector; struct ilo_shader_cache * @@ -168,6 +169,9 @@ ilo_shader_get_kernel_cso(const struct ilo_shader_state *shader); const struct pipe_stream_output_info * ilo_shader_get_kernel_so_info(const struct ilo_shader_state *shader); +const struct ilo_state_sol * +ilo_shader_get_kernel_sol(const struct ilo_shader_state *shader); + const struct ilo_kernel_routing * ilo_shader_get_kernel_routing(const struct ilo_shader_state *shader); diff --git a/src/gallium/drivers/ilo/ilo_state.h b/src/gallium/drivers/ilo/ilo_state.h index 39d0d7e..ae4639f 100644 --- a/src/gallium/drivers/ilo/ilo_state.h +++ b/src/gallium/drivers/ilo/ilo_state.h @@ -32,6 +32,7 @@ #include "core/ilo_state_cc.h" #include "core/ilo_state_raster.h" #include "core/ilo_state_sampler.h" +#include "core/ilo_state_sol.h" #include "core/ilo_state_surface.h" #include "core/ilo_state_viewport.h" #include "core/ilo_state_zs.h" diff --git a/src/gallium/drivers/ilo/shader/ilo_shader_internal.h b/src/gallium/drivers/ilo/shader/ilo_shader_internal.h index d2dc2f5..603d13e 100644 --- a/src/gallium/drivers/ilo/shader/ilo_shader_internal.h +++ b/src/gallium/drivers/ilo/shader/ilo_shader_internal.h @@ -28,6 +28,8 @@ #ifndef ILO_SHADER_INTERNAL_H #define ILO_SHADER_INTERNAL_H +#include "core/ilo_state_sol.h" + #include "ilo_common.h" #include "ilo_state.h" #include "ilo_shader.h" @@ -111,7 +113,9 @@ struct ilo_shader { bool stream_output; int svbi_post_inc; - struct pipe_stream_output_info so_info; + + uint32_t sol_data[PIPE_MAX_SO_OUTPUTS][2]; + struct ilo_state_sol sol; /* for VS stream output / rasterizer discard */ int gs_offsets[3]; -- 2.7.4