From a0dca4409a25b7810c28bcd64b48b3f0f159a455 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Christian=20K=C3=B6nig?= Date: Fri, 22 Mar 2013 15:59:22 +0100 Subject: [PATCH] radeonsi: add instance divisor support v3 MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit v2: reduce key size, don't copy key around to much. v3: remove key size reduction Signed-off-by: Christian König Reviewed-by: Michel Dänzer --- src/gallium/drivers/radeonsi/radeonsi_shader.c | 67 ++++++++++++++++---------- src/gallium/drivers/radeonsi/radeonsi_shader.h | 24 +++++---- src/gallium/drivers/radeonsi/si_state.c | 44 +++++++++-------- src/gallium/drivers/radeonsi/si_state_draw.c | 18 +++++-- 4 files changed, 94 insertions(+), 59 deletions(-) diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.c b/src/gallium/drivers/radeonsi/radeonsi_shader.c index 0512528..5fdf46e 100644 --- a/src/gallium/drivers/radeonsi/radeonsi_shader.c +++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c @@ -54,11 +54,9 @@ struct si_shader_context { struct radeon_llvm_context radeon_bld; - struct r600_context *rctx; struct tgsi_parse_context parse; struct tgsi_token * tokens; struct si_pipe_shader *shader; - struct si_shader_key key; unsigned type; /* TGSI_PROCESSOR_* specifies the type of shader. */ LLVMValueRef const_md; LLVMValueRef const_resource; @@ -112,22 +110,41 @@ static LLVMValueRef build_indexed_load( return result; } +static LLVMValueRef get_instance_index( + struct radeon_llvm_context * radeon_bld, + unsigned divisor) +{ + struct gallivm_state * gallivm = radeon_bld->soa.bld_base.base.gallivm; + + LLVMValueRef result = LLVMGetParam(radeon_bld->main_fn, SI_PARAM_INSTANCE_ID); + result = LLVMBuildAdd(gallivm->builder, result, LLVMGetParam( + radeon_bld->main_fn, SI_PARAM_START_INSTANCE), ""); + + if (divisor > 1) + result = LLVMBuildUDiv(gallivm->builder, result, + lp_build_const_int32(gallivm, divisor), ""); + + return result; +} + static void declare_input_vs( struct si_shader_context * si_shader_ctx, unsigned input_index, const struct tgsi_full_declaration *decl) { + struct lp_build_context * base = &si_shader_ctx->radeon_bld.soa.bld_base.base; + unsigned divisor = si_shader_ctx->shader->key.vs.instance_divisors[input_index]; + + unsigned chan; + LLVMValueRef t_list_ptr; LLVMValueRef t_offset; LLVMValueRef t_list; LLVMValueRef attribute_offset; - LLVMValueRef buffer_index_reg; + LLVMValueRef buffer_index; LLVMValueRef args[3]; LLVMTypeRef vec4_type; LLVMValueRef input; - struct lp_build_context * base = &si_shader_ctx->radeon_bld.soa.bld_base.base; - //struct pipe_vertex_element *velem = &rctx->vertex_elements->elements[input_index]; - unsigned chan; /* Load the T list */ t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_VERTEX_BUFFER); @@ -139,14 +156,20 @@ static void declare_input_vs( /* Build the attribute offset */ attribute_offset = lp_build_const_int32(base->gallivm, 0); - /* Load the buffer index, which is always stored in VGPR0 - * for Vertex Shaders */ - buffer_index_reg = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_VERTEX_ID); + if (divisor) { + /* Build index from instance ID, start instance and divisor */ + si_shader_ctx->shader->shader.uses_instanceid = true; + buffer_index = get_instance_index(&si_shader_ctx->radeon_bld, divisor); + } else { + /* Load the buffer index, which is always stored in VGPR0 + * for Vertex Shaders */ + buffer_index = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_VERTEX_ID); + } vec4_type = LLVMVectorType(base->elem_type, 4); args[0] = t_list; args[1] = attribute_offset; - args[2] = buffer_index_reg; + args[2] = buffer_index; input = build_intrinsic(base->gallivm->builder, "llvm.SI.vs.load.input", vec4_type, args, 3, LLVMReadNoneAttribute | LLVMNoUnwindAttribute); @@ -239,7 +262,7 @@ static void declare_input_fs( /* XXX: Handle all possible interpolation modes */ switch (decl->Interp.Interpolate) { case TGSI_INTERPOLATE_COLOR: - if (si_shader_ctx->key.flatshade) { + if (si_shader_ctx->shader->key.ps.flatshade) { interp_param = 0; } else { if (decl->Interp.Centroid) @@ -272,7 +295,7 @@ static void declare_input_fs( /* XXX: Could there be more than TGSI_NUM_CHANNELS (4) ? */ if (decl->Semantic.Name == TGSI_SEMANTIC_COLOR && - si_shader_ctx->key.color_two_side) { + si_shader_ctx->shader->key.ps.color_two_side) { LLVMValueRef args[4]; LLVMValueRef face, is_face_positive; LLVMValueRef back_attr_number = @@ -351,15 +374,12 @@ static void declare_system_value( unsigned index, const struct tgsi_full_declaration *decl) { - struct gallivm_state * gallivm = radeon_bld->soa.bld_base.base.gallivm; LLVMValueRef value = 0; switch (decl->Semantic.Name) { case TGSI_SEMANTIC_INSTANCEID: - value = LLVMGetParam(radeon_bld->main_fn, SI_PARAM_INSTANCE_ID); - value = LLVMBuildAdd(gallivm->builder, value, - LLVMGetParam(radeon_bld->main_fn, SI_PARAM_START_INSTANCE), ""); + value = get_instance_index(radeon_bld, 1); break; case TGSI_SEMANTIC_VERTEXID: @@ -433,7 +453,7 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base, int cbuf = target - V_008DFC_SQ_EXP_MRT; if (cbuf >= 0 && cbuf < 8) { - compressed = (si_shader_ctx->key.export_16bpc >> cbuf) & 0x1; + compressed = (si_shader_ctx->shader->key.ps.export_16bpc >> cbuf) & 0x1; if (compressed) si_shader_ctx->shader->spi_shader_col_format |= @@ -509,13 +529,13 @@ static void si_alpha_test(struct lp_build_tgsi_context *bld_base, struct si_shader_context *si_shader_ctx = si_shader_context(bld_base); struct gallivm_state *gallivm = bld_base->base.gallivm; - if (si_shader_ctx->key.alpha_func != PIPE_FUNC_NEVER) { + if (si_shader_ctx->shader->key.ps.alpha_func != PIPE_FUNC_NEVER) { LLVMValueRef out_ptr = si_shader_ctx->radeon_bld.soa.outputs[index][3]; LLVMValueRef alpha_pass = lp_build_cmp(&bld_base->base, - si_shader_ctx->key.alpha_func, + si_shader_ctx->shader->key.ps.alpha_func, LLVMBuildLoad(gallivm->builder, out_ptr, ""), - lp_build_const_float(gallivm, si_shader_ctx->key.alpha_ref)); + lp_build_const_float(gallivm, si_shader_ctx->shader->key.ps.alpha_ref)); LLVMValueRef arg = lp_build_select(&bld_base->base, alpha_pass, @@ -612,7 +632,7 @@ static void si_llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base) } else { target = V_008DFC_SQ_EXP_MRT + color_count; if (color_count == 0 && - si_shader_ctx->key.alpha_func != PIPE_FUNC_ALWAYS) + si_shader_ctx->shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS) si_alpha_test(bld_base, index); color_count++; @@ -1075,8 +1095,7 @@ static void preload_samplers(struct si_shader_context *si_shader_ctx) int si_pipe_shader_create( struct pipe_context *ctx, - struct si_pipe_shader *shader, - struct si_shader_key key) + struct si_pipe_shader *shader) { struct r600_context *rctx = (struct r600_context*)ctx; struct si_pipe_shader_selector *sel = shader->selector; @@ -1117,9 +1136,7 @@ int si_pipe_shader_create( si_shader_ctx.tokens = sel->tokens; tgsi_parse_init(&si_shader_ctx.parse, si_shader_ctx.tokens); si_shader_ctx.shader = shader; - si_shader_ctx.key = key; si_shader_ctx.type = si_shader_ctx.parse.FullHeader.Processor.Processor; - si_shader_ctx.rctx = rctx; create_meta_data(&si_shader_ctx); create_function(&si_shader_ctx); diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.h b/src/gallium/drivers/radeonsi/radeonsi_shader.h index 9dae742..9d3c14b 100644 --- a/src/gallium/drivers/radeonsi/radeonsi_shader.h +++ b/src/gallium/drivers/radeonsi/radeonsi_shader.h @@ -111,13 +111,18 @@ struct si_shader { unsigned nr_cbufs; }; -struct si_shader_key { - unsigned export_16bpc:8; - unsigned nr_cbufs:4; - unsigned color_two_side:1; - unsigned alpha_func:3; - unsigned flatshade:1; - float alpha_ref; +union si_shader_key { + struct { + unsigned export_16bpc:8; + unsigned nr_cbufs:4; + unsigned color_two_side:1; + unsigned alpha_func:3; + unsigned flatshade:1; + float alpha_ref; + } ps; + struct { + unsigned instance_divisors[PIPE_MAX_ATTRIBS]; + } vs; }; struct si_pipe_shader { @@ -132,12 +137,11 @@ struct si_pipe_shader { unsigned spi_shader_col_format; unsigned sprite_coord_enable; unsigned so_strides[4]; - struct si_shader_key key; + union si_shader_key key; }; /* radeonsi_shader.c */ -int si_pipe_shader_create(struct pipe_context *ctx, struct si_pipe_shader *shader, - struct si_shader_key key); +int si_pipe_shader_create(struct pipe_context *ctx, struct si_pipe_shader *shader); void si_pipe_shader_destroy(struct pipe_context *ctx, struct si_pipe_shader *shader); #endif diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index bdd41b4..ca9e8b4 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -1870,30 +1870,36 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, */ /* Compute the key for the hw shader variant */ -static INLINE struct si_shader_key si_shader_selector_key(struct pipe_context *ctx, - struct si_pipe_shader_selector *sel) +static INLINE void si_shader_selector_key(struct pipe_context *ctx, + struct si_pipe_shader_selector *sel, + union si_shader_key *key) { struct r600_context *rctx = (struct r600_context *)ctx; - struct si_shader_key key; - memset(&key, 0, sizeof(key)); + memset(key, 0, sizeof(*key)); - if (sel->type == PIPE_SHADER_FRAGMENT) { + if (sel->type == PIPE_SHADER_VERTEX) { + unsigned i; + if (!rctx->vertex_elements) + return; + + for (i = 0; i < rctx->vertex_elements->count; ++i) + key->vs.instance_divisors[i] = rctx->vertex_elements->elements[i].instance_divisor; + + } else if (sel->type == PIPE_SHADER_FRAGMENT) { if (sel->fs_write_all) - key.nr_cbufs = rctx->framebuffer.nr_cbufs; - key.export_16bpc = rctx->export_16bpc; + key->ps.nr_cbufs = rctx->framebuffer.nr_cbufs; + key->ps.export_16bpc = rctx->export_16bpc; if (rctx->queued.named.rasterizer) { - key.color_two_side = rctx->queued.named.rasterizer->two_side; - key.flatshade = rctx->queued.named.rasterizer->flatshade; + key->ps.color_two_side = rctx->queued.named.rasterizer->two_side; + key->ps.flatshade = rctx->queued.named.rasterizer->flatshade; } if (rctx->queued.named.dsa) { - key.alpha_func = rctx->queued.named.dsa->alpha_func; - key.alpha_ref = rctx->queued.named.dsa->alpha_ref; + key->ps.alpha_func = rctx->queued.named.dsa->alpha_func; + key->ps.alpha_ref = rctx->queued.named.dsa->alpha_ref; } else { - key.alpha_func = PIPE_FUNC_ALWAYS; + key->ps.alpha_func = PIPE_FUNC_ALWAYS; } } - - return key; } /* Select the hw shader variant depending on the current state. @@ -1902,11 +1908,11 @@ int si_shader_select(struct pipe_context *ctx, struct si_pipe_shader_selector *sel, unsigned *dirty) { - struct si_shader_key key; + union si_shader_key key; struct si_pipe_shader * shader = NULL; int r; - key = si_shader_selector_key(ctx, sel); + si_shader_selector_key(ctx, sel, &key); /* Check if we don't need to change anything. * This path is also used for most shaders that don't need multiple @@ -1934,8 +1940,9 @@ int si_shader_select(struct pipe_context *ctx, if (unlikely(!shader)) { shader = CALLOC(1, sizeof(struct si_pipe_shader)); shader->selector = sel; + shader->key = key; - r = si_pipe_shader_create(ctx, shader, key); + r = si_pipe_shader_create(ctx, shader); if (unlikely(r)) { R600_ERR("Failed to build shader variant (type=%u) %d\n", sel->type, r); @@ -1951,10 +1958,9 @@ int si_shader_select(struct pipe_context *ctx, sel->num_shaders == 0 && shader->shader.fs_write_all) { sel->fs_write_all = 1; - key = si_shader_selector_key(ctx, sel); + si_shader_selector_key(ctx, sel, &shader->key); } - shader->key = key; sel->num_shaders++; } diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 0deb06f..a90a5da 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -145,7 +145,7 @@ static void si_pipe_shader_ps(struct pipe_context *ctx, struct si_pipe_shader *s if (shader->shader.output[i].name == TGSI_SEMANTIC_STENCIL) db_shader_control |= S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(1); } - if (shader->shader.uses_kill || shader->key.alpha_func != PIPE_FUNC_ALWAYS) + if (shader->shader.uses_kill || shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS) db_shader_control |= S_02880C_KILL_ENABLE(1); exports_ps = 0; @@ -329,7 +329,7 @@ bcolor: if (ps->input[i].interpolate == TGSI_INTERPOLATE_CONSTANT || (ps->input[i].interpolate == TGSI_INTERPOLATE_COLOR && - rctx->ps_shader->current->key.flatshade)) { + rctx->ps_shader->current->key.ps.flatshade)) { tmp |= S_028644_FLAT_SHADE(1); } @@ -356,7 +356,7 @@ bcolor: tmp); if (name == TGSI_SEMANTIC_COLOR && - rctx->ps_shader->current->key.color_two_side) { + rctx->ps_shader->current->key.ps.color_two_side) { name = TGSI_SEMANTIC_BCOLOR; param_offset++; goto bcolor; @@ -369,7 +369,7 @@ bcolor: static void si_update_derived_state(struct r600_context *rctx) { struct pipe_context * ctx = (struct pipe_context*)rctx; - unsigned ps_dirty = 0; + unsigned vs_dirty = 0, ps_dirty = 0; if (!rctx->blitter->running) { /* Flush depth textures which need to be flushed. */ @@ -381,12 +381,20 @@ static void si_update_derived_state(struct r600_context *rctx) } } - si_shader_select(ctx, rctx->ps_shader, &ps_dirty); + si_shader_select(ctx, rctx->vs_shader, &vs_dirty); if (!rctx->vs_shader->current->pm4) { si_pipe_shader_vs(ctx, rctx->vs_shader->current); + vs_dirty = 0; + } + + if (vs_dirty) { + si_pm4_bind_state(rctx, vs, rctx->vs_shader->current->pm4); } + + si_shader_select(ctx, rctx->ps_shader, &ps_dirty); + if (!rctx->ps_shader->current->pm4) { si_pipe_shader_ps(ctx, rctx->ps_shader->current); ps_dirty = 0; -- 2.7.4