radeonsi: Maintain cache of pixel shader variants according to contxt state.
authorMichel Dänzer <michel.daenzer@amd.com>
Thu, 23 Aug 2012 15:10:37 +0000 (17:10 +0200)
committerMichel Dänzer <michel@daenzer.net>
Mon, 27 Aug 2012 09:51:41 +0000 (11:51 +0200)
Mostly inspired by r600g commit 4acf71f01ea1edb253cd38cc059d4af1a2a40bf4
('r600g: cache shader variants instead of rebuilding v3').

Signed-off-by: Michel Dänzer <michel.daenzer@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
src/gallium/drivers/radeonsi/radeonsi_pipe.h
src/gallium/drivers/radeonsi/radeonsi_shader.c
src/gallium/drivers/radeonsi/radeonsi_shader.h
src/gallium/drivers/radeonsi/si_state.c
src/gallium/drivers/radeonsi/si_state.h
src/gallium/drivers/radeonsi/si_state_draw.c

index bec2939..989bb49 100644 (file)
@@ -126,8 +126,8 @@ struct r600_context {
        unsigned                        pa_cl_vs_out_cntl;
        /* for saving when using blitter */
        struct pipe_stencil_ref         stencil_ref;
-       struct si_pipe_shader           *ps_shader;
-       struct si_pipe_shader           *vs_shader;
+       struct si_pipe_shader_selector  *ps_shader;
+       struct si_pipe_shader_selector  *vs_shader;
        struct pipe_query               *current_render_cond;
        unsigned                        current_render_cond_mode;
        struct pipe_query               *saved_render_cond;
index 0f2aaef..671eda4 100644 (file)
@@ -282,7 +282,8 @@ static void declare_input_fs(
        switch (decl->Interp.Interpolate) {
        case TGSI_INTERPOLATE_COLOR:
                /* XXX: Flat shading hangs the GPU */
-               if (si_shader_ctx->rctx->queued.named.rasterizer->flatshade) {
+               if (si_shader_ctx->rctx->queued.named.rasterizer &&
+                   si_shader_ctx->rctx->queued.named.rasterizer->flatshade) {
 #if 0
                        intr_name = "llvm.SI.fs.interp.constant";
 #else
@@ -617,6 +618,7 @@ int si_pipe_shader_create(
        struct si_pipe_shader *shader)
 {
        struct r600_context *rctx = (struct r600_context*)ctx;
+       struct si_pipe_shader_selector *sel = shader->selector;
        struct si_shader_context si_shader_ctx;
        struct tgsi_shader_info shader_info;
        struct lp_build_tgsi_context * bld_base;
@@ -633,7 +635,7 @@ int si_pipe_shader_create(
        radeon_llvm_context_init(&si_shader_ctx.radeon_bld);
        bld_base = &si_shader_ctx.radeon_bld.soa.bld_base;
 
-       tgsi_scan_shader(shader->tokens, &shader_info);
+       tgsi_scan_shader(sel->tokens, &shader_info);
        bld_base->info = &shader_info;
        bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant;
        bld_base->emit_epilogue = si_llvm_emit_epilogue;
@@ -642,7 +644,7 @@ int si_pipe_shader_create(
        bld_base->op_actions[TGSI_OPCODE_TXP] = tex_action;
 
        si_shader_ctx.radeon_bld.load_input = declare_input;
-       si_shader_ctx.tokens = shader->tokens;
+       si_shader_ctx.tokens = sel->tokens;
        tgsi_parse_init(&si_shader_ctx.parse, si_shader_ctx.tokens);
        si_shader_ctx.shader = shader;
        si_shader_ctx.type = si_shader_ctx.parse.FullHeader.Processor.Processor;
@@ -653,10 +655,10 @@ int si_pipe_shader_create(
        /* Dump TGSI code before doing TGSI->LLVM conversion in case the
         * conversion fails. */
        if (dump) {
-               tgsi_dump(shader->tokens, 0);
+               tgsi_dump(sel->tokens, 0);
        }
 
-       if (!lp_build_tgsi_llvm(bld_base, shader->tokens)) {
+       if (!lp_build_tgsi_llvm(bld_base, sel->tokens)) {
                fprintf(stderr, "Failed to translate shader from TGSI to LLVM\n");
                return -EINVAL;
        }
@@ -710,6 +712,4 @@ int si_pipe_shader_create(
 void si_pipe_shader_destroy(struct pipe_context *ctx, struct si_pipe_shader *shader)
 {
        si_resource_reference(&shader->bo, NULL);
-
-       memset(&shader->shader,0,sizeof(struct si_shader));
 }
index d44ee9b..aa2888c 100644 (file)
@@ -37,6 +37,25 @@ struct si_shader_io {
        bool                    centroid;
 };
 
+struct si_pipe_shader;
+
+struct si_pipe_shader_selector {
+       struct si_pipe_shader *current;
+
+       struct tgsi_token       *tokens;
+       struct pipe_stream_output_info  so;
+
+       unsigned        num_shaders;
+
+       /* PIPE_SHADER_[VERTEX|FRAGMENT|...] */
+       unsigned        type;
+
+       /* 1 when the shader contains
+        * TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS, otherwise it's 0.
+        * Used to determine whether we need to include nr_cbufs in the key */
+       unsigned        fs_write_all;
+};
+
 struct si_shader {
        unsigned                ninput;
        struct si_shader_io     input[32];
@@ -50,16 +69,17 @@ struct si_shader {
 };
 
 struct si_pipe_shader {
+       struct si_pipe_shader_selector  *selector;
+       struct si_pipe_shader           *next_variant;
        struct si_shader                shader;
        struct si_pm4_state             *pm4;
        struct si_resource              *bo;
-       struct tgsi_token               *tokens;
        unsigned                        num_sgprs;
        unsigned                        num_vgprs;
        unsigned                        spi_ps_input_ena;
        unsigned                        sprite_coord_enable;
-       struct pipe_stream_output_info  so;
        unsigned                        so_strides[4];
+       unsigned                        key;
 };
 
 /* radeonsi_shader.c */
index 5df22dd..5c2e743 100644 (file)
@@ -1717,77 +1717,200 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
  * shaders
  */
 
+/* Compute the key for the hw shader variant */
+static INLINE unsigned si_shader_selector_key(struct pipe_context *ctx,
+                                             struct si_pipe_shader_selector *sel)
+{
+       struct r600_context *rctx = (struct r600_context *)ctx;
+       unsigned key = 0;
+
+       if (sel->type == PIPE_SHADER_FRAGMENT) {
+               if (sel->fs_write_all)
+                       key |= rctx->framebuffer.nr_cbufs;
+               /*if (rctx->queued.named.rasterizer)
+                         key |= rctx->queued.named.rasterizer->flatshade << 4;*/
+               /*key |== rctx->two_side << 5;*/
+       }
+
+       return key;
+}
+
+/* Select the hw shader variant depending on the current state.
+ * (*dirty) is set to 1 if current variant was changed */
+int si_shader_select(struct pipe_context *ctx,
+                    struct si_pipe_shader_selector *sel,
+                    unsigned *dirty)
+{
+       unsigned key;
+       struct si_pipe_shader * shader = NULL;
+       int r;
+
+       key = si_shader_selector_key(ctx, sel);
+
+       /* Check if we don't need to change anything.
+        * This path is also used for most shaders that don't need multiple
+        * variants, it will cost just a computation of the key and this
+        * test. */
+       if (likely(sel->current && sel->current->key == key)) {
+               return 0;
+       }
+
+       /* lookup if we have other variants in the list */
+       if (sel->num_shaders > 1) {
+               struct si_pipe_shader *p = sel->current, *c = p->next_variant;
+
+               while (c && c->key != key) {
+                       p = c;
+                       c = c->next_variant;
+               }
+
+               if (c) {
+                       p->next_variant = c->next_variant;
+                       shader = c;
+               }
+       }
+
+       if (unlikely(!shader)) {
+               shader = CALLOC(1, sizeof(struct si_pipe_shader));
+               shader->selector = sel;
+
+               r = si_pipe_shader_create(ctx, shader);
+               if (unlikely(r)) {
+                       R600_ERR("Failed to build shader variant (type=%u, key=%u) %d\n",
+                                sel->type, key, r);
+                       sel->current = NULL;
+                       return r;
+               }
+
+               /* We don't know the value of fs_write_all property until we built
+                * at least one variant, so we may need to recompute the key (include
+                * rctx->framebuffer.nr_cbufs) after building first variant. */
+               if (sel->type == PIPE_SHADER_FRAGMENT &&
+                   sel->num_shaders == 0 &&
+                   shader->shader.fs_write_all) {
+                       sel->fs_write_all = 1;
+                       key = si_shader_selector_key(ctx, sel);
+               }
+
+               shader->key = key;
+               sel->num_shaders++;
+       }
+
+       if (dirty)
+               *dirty = 1;
+
+       shader->next_variant = sel->current;
+       sel->current = shader;
+
+       return 0;
+}
+
 static void *si_create_shader_state(struct pipe_context *ctx,
-                             const struct pipe_shader_state *state)
+                                   const struct pipe_shader_state *state,
+                                   unsigned pipe_shader_type)
 {
-       struct si_pipe_shader *shader = CALLOC_STRUCT(si_pipe_shader);
+       struct si_pipe_shader_selector *sel = CALLOC_STRUCT(si_pipe_shader_selector);
+       int r;
 
-       shader->tokens = tgsi_dup_tokens(state->tokens);
-       shader->so = state->stream_output;
+       sel->type = pipe_shader_type;
+       sel->tokens = tgsi_dup_tokens(state->tokens);
+       sel->so = state->stream_output;
+
+       r = si_shader_select(ctx, sel, NULL);
+       if (r) {
+           free(sel);
+           return NULL;
+       }
+
+       return sel;
+}
+
+static void *si_create_fs_state(struct pipe_context *ctx,
+                               const struct pipe_shader_state *state)
+{
+       return si_create_shader_state(ctx, state, PIPE_SHADER_FRAGMENT);
+}
 
-       return shader;
+static void *si_create_vs_state(struct pipe_context *ctx,
+                               const struct pipe_shader_state *state)
+{
+       return si_create_shader_state(ctx, state, PIPE_SHADER_VERTEX);
 }
 
 static void si_bind_vs_shader(struct pipe_context *ctx, void *state)
 {
        struct r600_context *rctx = (struct r600_context *)ctx;
-       struct si_pipe_shader *shader = state;
+       struct si_pipe_shader_selector *sel = state;
 
-       if (rctx->vs_shader == state)
+       if (rctx->vs_shader == sel)
                return;
 
        rctx->shader_dirty = true;
-       rctx->vs_shader = shader;
+       rctx->vs_shader = sel;
 
-       if (shader) {
-               si_pm4_bind_state(rctx, vs, shader->pm4);
-       }
+       if (sel && sel->current)
+               si_pm4_bind_state(rctx, vs, sel->current->pm4);
+       else
+               si_pm4_bind_state(rctx, vs, rctx->dummy_pixel_shader->pm4);
 }
 
 static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
 {
        struct r600_context *rctx = (struct r600_context *)ctx;
-       struct si_pipe_shader *shader = state;
+       struct si_pipe_shader_selector *sel = state;
 
-       if (rctx->ps_shader == state)
+       if (rctx->ps_shader == sel)
                return;
 
        rctx->shader_dirty = true;
-       rctx->ps_shader = shader;
+       rctx->ps_shader = sel;
 
-       if (shader) {
-               si_pm4_bind_state(rctx, ps, shader->pm4);
-       }
+       if (sel && sel->current)
+               si_pm4_bind_state(rctx, ps, sel->current->pm4);
+       else
+               si_pm4_bind_state(rctx, ps, rctx->dummy_pixel_shader->pm4);
 }
 
+static void si_delete_shader_selector(struct pipe_context *ctx,
+                                     struct si_pipe_shader_selector *sel)
+{
+       struct r600_context *rctx = (struct r600_context *)ctx;
+       struct si_pipe_shader *p = sel->current, *c;
+
+       while (p) {
+               c = p->next_variant;
+               si_pm4_delete_state(rctx, vs, p->pm4);
+               si_pipe_shader_destroy(ctx, p);
+               free(p);
+               p = c;
+       }
+
+       free(sel->tokens);
+       free(sel);
+ }
+
 static void si_delete_vs_shader(struct pipe_context *ctx, void *state)
 {
        struct r600_context *rctx = (struct r600_context *)ctx;
-       struct si_pipe_shader *shader = (struct si_pipe_shader *)state;
+       struct si_pipe_shader_selector *sel = (struct si_pipe_shader_selector *)state;
 
-       if (rctx->vs_shader == shader) {
+       if (rctx->vs_shader == sel) {
                rctx->vs_shader = NULL;
        }
 
-       si_pm4_delete_state(rctx, vs, shader->pm4);
-       free(shader->tokens);
-       si_pipe_shader_destroy(ctx, shader);
-       free(shader);
+       si_delete_shader_selector(ctx, sel);
 }
 
 static void si_delete_ps_shader(struct pipe_context *ctx, void *state)
 {
        struct r600_context *rctx = (struct r600_context *)ctx;
-       struct si_pipe_shader *shader = (struct si_pipe_shader *)state;
+       struct si_pipe_shader_selector *sel = (struct si_pipe_shader_selector *)state;
 
-       if (rctx->ps_shader == shader) {
+       if (rctx->ps_shader == sel) {
                rctx->ps_shader = NULL;
        }
 
-       si_pm4_delete_state(rctx, ps, shader->pm4);
-       free(shader->tokens);
-       si_pipe_shader_destroy(ctx, shader);
-       free(shader);
+       si_delete_shader_selector(ctx, sel);
 }
 
 /*
@@ -2269,8 +2392,8 @@ void si_init_state_functions(struct r600_context *rctx)
 
        rctx->context.set_framebuffer_state = si_set_framebuffer_state;
 
-       rctx->context.create_vs_state = si_create_shader_state;
-       rctx->context.create_fs_state = si_create_shader_state;
+       rctx->context.create_vs_state = si_create_vs_state;
+       rctx->context.create_fs_state = si_create_fs_state;
        rctx->context.bind_vs_state = si_bind_vs_shader;
        rctx->context.bind_fs_state = si_bind_ps_shader;
        rctx->context.delete_vs_state = si_delete_vs_shader;
index f53ecb7..d59624c 100644 (file)
@@ -126,11 +126,16 @@ union si_state {
        } while(0)
 
 /* si_state.c */
+struct si_pipe_shader_selector;
+
 bool si_is_format_supported(struct pipe_screen *screen,
                            enum pipe_format format,
                            enum pipe_texture_target target,
                            unsigned sample_count,
                            unsigned usage);
+int si_shader_select(struct pipe_context *ctx,
+                    struct si_pipe_shader_selector *sel,
+                    unsigned *dirty);
 void si_init_state_functions(struct r600_context *rctx);
 void si_init_config(struct r600_context *rctx);
 
index fda8b0b..95821dc 100644 (file)
@@ -317,8 +317,8 @@ static void si_update_alpha_ref(struct r600_context *rctx)
 
 static void si_update_spi_map(struct r600_context *rctx)
 {
-       struct si_shader *ps = &rctx->ps_shader->shader;
-       struct si_shader *vs = &rctx->vs_shader->shader;
+       struct si_shader *ps = &rctx->ps_shader->current->shader;
+       struct si_shader *vs = &rctx->vs_shader->current->shader;
        struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
        unsigned i, j, tmp;
 
@@ -362,36 +362,39 @@ static void si_update_spi_map(struct r600_context *rctx)
 static void si_update_derived_state(struct r600_context *rctx)
 {
        struct pipe_context * ctx = (struct pipe_context*)rctx;
+       unsigned ps_dirty = 0;
 
        if (!rctx->blitter->running) {
                if (rctx->have_depth_fb || rctx->have_depth_texture)
                        si_flush_depth_textures(rctx);
        }
 
-       if ((rctx->ps_shader->shader.fs_write_all &&
-            (rctx->ps_shader->shader.nr_cbufs != rctx->framebuffer.nr_cbufs)) ||
-           (rctx->sprite_coord_enable &&
-            (rctx->ps_shader->sprite_coord_enable != rctx->sprite_coord_enable))) {
-               si_pipe_shader_destroy(&rctx->context, rctx->ps_shader);
-       }
+       si_shader_select(ctx, rctx->ps_shader, &ps_dirty);
 
        if (rctx->alpha_ref_dirty) {
                si_update_alpha_ref(rctx);
        }
 
-       if (!rctx->vs_shader->bo) {
-               si_pipe_shader_vs(ctx, rctx->vs_shader);
+       if (!rctx->vs_shader->current->pm4) {
+               si_pipe_shader_vs(ctx, rctx->vs_shader->current);
        }
 
-       if (!rctx->ps_shader->bo) {
-               si_pipe_shader_ps(ctx, rctx->ps_shader);
+       if (!rctx->ps_shader->current->pm4) {
+               si_pipe_shader_ps(ctx, rctx->ps_shader->current);
+               ps_dirty = 0;
        }
-       if (!rctx->ps_shader->bo) {
-               if (!rctx->dummy_pixel_shader->bo)
+       if (!rctx->ps_shader->current->bo) {
+               if (!rctx->dummy_pixel_shader->pm4)
                        si_pipe_shader_ps(ctx, rctx->dummy_pixel_shader);
-
-               if (rctx->dummy_pixel_shader->pm4)
+               else
                        si_pm4_bind_state(rctx, vs, rctx->dummy_pixel_shader->pm4);
+
+               ps_dirty = 0;
+       }
+
+       if (ps_dirty) {
+               si_pm4_bind_state(rctx, ps, rctx->ps_shader->current->pm4);
+               rctx->shader_dirty = true;
        }
 
        if (rctx->shader_dirty) {
@@ -545,7 +548,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
                r600_context_draw_opaque_count(rctx, (struct r600_so_target*)info->count_from_stream_output);
        }
 
-       rctx->vs_shader_so_strides = rctx->vs_shader->so_strides;
+       rctx->vs_shader_so_strides = rctx->vs_shader->current->so_strides;
 
        if (!si_update_draw_info_state(rctx, info))
                return;