From ee839cc6ef92d37ec6a44e6036e7a2c46172a16a Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 8 Apr 2014 14:14:43 -0400 Subject: [PATCH] freedreno/a3xx: deal with optimized tex instructions Keep track of whether we actually have any sam instructions in the resulting shader, rather than using TGSI SAMP declarations. If the sam instruction is optimized out, because the result is not used, we don't want to emit texture state, etc. In fact emitting sampler state and/or setting PIXLODENABLE bit when there are no texture fetches seems to cause lockup. In theory this should never happen for a "normal" shader, unless the state tracker is wonky. But it is a very real possibility for binning pass shaders. Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/a3xx/fd3_compiler.c | 10 +--------- .../drivers/freedreno/a3xx/fd3_compiler_old.c | 2 +- src/gallium/drivers/freedreno/a3xx/fd3_emit.c | 18 +++++++++++++----- src/gallium/drivers/freedreno/a3xx/fd3_program.c | 6 +++--- src/gallium/drivers/freedreno/a3xx/fd3_program.h | 4 ++-- src/gallium/drivers/freedreno/a3xx/ir3.h | 4 ++-- src/gallium/drivers/freedreno/a3xx/ir3_ra.c | 22 +++++++++++++++++++--- 7 files changed, 41 insertions(+), 25 deletions(-) diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c index 1d99e5c..911330c 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c @@ -2054,12 +2054,6 @@ decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) } } -static void -decl_samp(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) -{ - ctx->so->samplers_count++; -} - /* from TGSI perspective, we actually have inputs. But most of the "inputs" * for a fragment shader are just bary.f instructions. The *actual* inputs * from the hw perspective are the frag_pos and optionally frag_coord and @@ -2160,8 +2154,6 @@ compile_instructions(struct fd3_compile_context *ctx) decl_out(ctx, decl); } else if (decl->Declaration.File == TGSI_FILE_INPUT) { decl_in(ctx, decl); - } else if (decl->Declaration.File == TGSI_FILE_SAMPLER) { - decl_samp(ctx, decl); } break; } @@ -2320,7 +2312,7 @@ fd3_compile_shader(struct fd3_shader_variant *so, } ret = ir3_block_ra(block, so->type, key.half_precision, - so->frag_coord, so->frag_face); + so->frag_coord, so->frag_face, &so->has_samp); if (ret) goto out; diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c index 76de287..ee58591 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_compiler_old.c @@ -1417,7 +1417,7 @@ decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) static void decl_samp(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl) { - ctx->so->samplers_count++; + ctx->so->has_samp = true; } static void diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c index 00f1014..b1cf3fd 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c @@ -177,7 +177,7 @@ emit_textures(struct fd_ringbuffer *ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); for (i = 0; i < tex->num_samplers; i++) { static const struct fd3_sampler_stateobj dummy_sampler = {}; - struct fd3_sampler_stateobj *sampler = tex->samplers[i] ? + const struct fd3_sampler_stateobj *sampler = tex->samplers[i] ? fd3_sampler_stateobj(tex->samplers[i]) : &dummy_sampler; OUT_RING(ring, sampler->texsamp0); @@ -542,11 +542,19 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, if (dirty & (FD_DIRTY_VERTTEX | FD_DIRTY_FRAGTEX)) fd_wfi(ctx, ring); - if (dirty & FD_DIRTY_VERTTEX) - emit_textures(ring, SB_VERT_TEX, &ctx->verttex); + if (dirty & FD_DIRTY_VERTTEX) { + if (vp->has_samp) + emit_textures(ring, SB_VERT_TEX, &ctx->verttex); + else + dirty &= ~FD_DIRTY_VERTTEX; + } - if (dirty & FD_DIRTY_FRAGTEX) - emit_textures(ring, SB_FRAG_TEX, &ctx->fragtex); + if (dirty & FD_DIRTY_FRAGTEX) { + if (fp->has_samp) + emit_textures(ring, SB_FRAG_TEX, &ctx->fragtex); + else + dirty &= ~FD_DIRTY_FRAGTEX; + } ctx->dirty &= ~dirty; } diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c index 09cadf8..b5544e8 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c @@ -120,7 +120,7 @@ create_variant(struct fd3_shader_stateobj *so, struct fd3_shader_key key) v->inputs_count = 0; v->outputs_count = 0; v->total_in = 0; - v->samplers_count = 0; + v->has_samp = false; v->immediates_count = 0; } } else { @@ -397,7 +397,7 @@ fd3_program_emit(struct fd_ringbuffer *ring, A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP(0) | A3XX_SP_VS_CTRL_REG0_THREADSIZE(TWO_QUADS) | A3XX_SP_VS_CTRL_REG0_SUPERTHREADMODE | - COND(vp->samplers_count > 0, A3XX_SP_VS_CTRL_REG0_PIXLODENABLE) | + COND(vp->has_samp, A3XX_SP_VS_CTRL_REG0_PIXLODENABLE) | A3XX_SP_VS_CTRL_REG0_LENGTH(vp->instrlen)); OUT_RING(ring, A3XX_SP_VS_CTRL_REG1_CONSTLENGTH(vp->constlen) | A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(vp->total_in) | @@ -475,7 +475,7 @@ fd3_program_emit(struct fd_ringbuffer *ring, A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(1) | A3XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) | A3XX_SP_FS_CTRL_REG0_SUPERTHREADMODE | - COND(fp->samplers_count > 0, A3XX_SP_FS_CTRL_REG0_PIXLODENABLE) | + COND(fp->has_samp > 0, A3XX_SP_FS_CTRL_REG0_PIXLODENABLE) | A3XX_SP_FS_CTRL_REG0_LENGTH(fp->instrlen)); OUT_RING(ring, A3XX_SP_FS_CTRL_REG1_CONSTLENGTH(fp->constlen) | A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING(fp->total_in) | diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.h b/src/gallium/drivers/freedreno/a3xx/fd3_program.h index 8d4fd57..e0866c1 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_program.h +++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.h @@ -107,8 +107,8 @@ struct fd3_shader_variant { unsigned total_in; /* sum of inputs (scalar) */ - /* samplers: */ - unsigned samplers_count; + /* do we have one or more texture sample instructions: */ + bool has_samp; /* const reg # of first immediate, ie. 1 == c1 * (not regid, because TGSI thinks in terms of vec4 registers, diff --git a/src/gallium/drivers/freedreno/a3xx/ir3.h b/src/gallium/drivers/freedreno/a3xx/ir3.h index 0905234..872f478 100644 --- a/src/gallium/drivers/freedreno/a3xx/ir3.h +++ b/src/gallium/drivers/freedreno/a3xx/ir3.h @@ -385,8 +385,8 @@ void ir3_block_sched(struct ir3_block *block); /* register assignment: */ int ir3_block_ra(struct ir3_block *block, enum shader_t type, - bool half_precision, bool frag_coord, bool frag_face); - + bool half_precision, bool frag_coord, bool frag_face, + bool *has_samp); #ifndef ARRAY_SIZE # define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) diff --git a/src/gallium/drivers/freedreno/a3xx/ir3_ra.c b/src/gallium/drivers/freedreno/a3xx/ir3_ra.c index 4e48ede..57c68c7 100644 --- a/src/gallium/drivers/freedreno/a3xx/ir3_ra.c +++ b/src/gallium/drivers/freedreno/a3xx/ir3_ra.c @@ -56,6 +56,7 @@ struct ir3_ra_ctx { bool half_precision; bool frag_coord; bool frag_face; + bool has_samp; int cnt; bool error; }; @@ -654,8 +655,17 @@ static void legalize(struct ir3_ra_ctx *ctx, struct ir3_block *block) if (is_sfu(n)) regmask_set(&needs_ss, n->regs[0]); - if (is_tex(n)) + if (is_tex(n)) { + /* this ends up being the # of samp instructions.. but that + * is ok, everything else only cares whether it is zero or + * not. We do this here, rather than when we encounter a + * SAMP decl, because (especially in binning pass shader) + * the samp instruction(s) could get eliminated if the + * result is not used. + */ + ctx->has_samp = true; regmask_set(&needs_sy, n->regs[0]); + } /* both tex/sfu appear to not always immediately consume * their src register(s): @@ -730,7 +740,8 @@ static int block_ra(struct ir3_ra_ctx *ctx, struct ir3_block *block) } int ir3_block_ra(struct ir3_block *block, enum shader_t type, - bool half_precision, bool frag_coord, bool frag_face) + bool half_precision, bool frag_coord, bool frag_face, + bool *has_samp) { struct ir3_ra_ctx ctx = { .block = block, @@ -739,6 +750,11 @@ int ir3_block_ra(struct ir3_block *block, enum shader_t type, .frag_coord = frag_coord, .frag_face = frag_face, }; + int ret; + ir3_shader_clear_mark(block->shader); - return block_ra(&ctx, block); + ret = block_ra(&ctx, block); + *has_samp = ctx.has_samp; + + return ret; } -- 2.7.4