From 173871dfb988c3e9fb74a8016d2b024619a5d918 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 11 Apr 2016 14:47:19 -0400 Subject: [PATCH] freedreno/ir3: lower immeds to const Helps reduce register pressure and instruction counts for immediates that would otherwise require a mov into gpr. total instructions in shared programs: 4455332 -> 4369297 (-1.93%) total dwords in shared programs: 8807872 -> 8614432 (-2.20%) total full registers used in shared programs: 263062 -> 250846 (-4.64%) total half registers used in shader programs: 9845 -> 9845 (0.00%) total const registers used in shared programs: 1029735 -> 1466993 (42.46%) half full const instr dwords helped 0 10415 0 17861 5912 hurt 0 1157 21458 947 33 Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/a3xx/fd3_emit.c | 7 ++- src/gallium/drivers/freedreno/a4xx/fd4_emit.c | 7 ++- src/gallium/drivers/freedreno/ir3/ir3_cp.c | 70 +++++++++++++++++++++++++++ 3 files changed, 80 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c index 4470c2a..e1d0a4f 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c @@ -659,8 +659,11 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, ir3_emit_consts(vp, ring, ctx, emit->info, dirty); if (!emit->key.binning_pass) ir3_emit_consts(fp, ring, ctx, emit->info, dirty); - /* mark clean after emitting consts: */ - ctx->prog.dirty = 0; + /* mark clean after emitting consts.. a bit ugly, but since binning + * pass is emitted first, we want to do this only for main draw: + */ + if (!emit->key.binning_pass) + ctx->prog.dirty = 0; } if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) { diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c index 27614f0..0144ba4 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c @@ -648,8 +648,11 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, ir3_emit_consts(vp, ring, ctx, emit->info, dirty); if (!emit->key.binning_pass) ir3_emit_consts(fp, ring, ctx, emit->info, dirty); - /* mark clean after emitting consts: */ - ctx->prog.dirty = 0; + /* mark clean after emitting consts.. a bit ugly, but since binning + * pass is emitted first, we want to do this only for main draw: + */ + if (!emit->key.binning_pass) + ctx->prog.dirty = 0; } if ((dirty & FD_DIRTY_BLEND)) { diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cp.c b/src/gallium/drivers/freedreno/ir3/ir3_cp.c index 267664c..60c2830 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_cp.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_cp.c @@ -29,13 +29,16 @@ #include "freedreno_util.h" #include "ir3.h" +#include "ir3_shader.h" /* * Copy Propagate: */ struct ir3_cp_ctx { + struct ir3 *shader; struct ir3_shader_variant *so; + unsigned immediate_idx; }; /* is it a type preserving mov, with ok flags? */ @@ -233,6 +236,62 @@ static void combine_flags(unsigned *dstflags, struct ir3_instruction *src) *dstflags &= ~IR3_REG_SABS; } +static struct ir3_register * +lower_immed(struct ir3_cp_ctx *ctx, struct ir3_register *reg, unsigned new_flags) +{ + unsigned swiz, idx, i; + + reg = ir3_reg_clone(ctx->shader, reg); + + /* in some cases, there are restrictions on (abs)/(neg) plus const.. + * so just evaluate those and clear the flags: + */ + if (new_flags & IR3_REG_SABS) { + reg->iim_val = abs(reg->iim_val); + new_flags &= ~IR3_REG_SABS; + } + + if (new_flags & IR3_REG_FABS) { + reg->fim_val = fabs(reg->fim_val); + new_flags &= ~IR3_REG_FABS; + } + + if (new_flags & IR3_REG_SNEG) { + reg->iim_val = -reg->iim_val; + new_flags &= ~IR3_REG_SNEG; + } + + if (new_flags & IR3_REG_FNEG) { + reg->fim_val = -reg->fim_val; + new_flags &= ~IR3_REG_FNEG; + } + + for (i = 0; i < ctx->immediate_idx; i++) { + swiz = i % 4; + idx = i / 4; + + if (ctx->so->immediates[idx].val[swiz] == reg->uim_val) { + break; + } + } + + if (i == ctx->immediate_idx) { + /* need to generate a new immediate: */ + swiz = i % 4; + idx = i / 4; + ctx->so->immediates[idx].val[swiz] = reg->uim_val; + ctx->so->immediates_count = idx + 1; + ctx->immediate_idx++; + } + + new_flags &= ~IR3_REG_IMMED; + new_flags |= IR3_REG_CONST; + reg->flags = new_flags; + reg->num = i + (4 * ctx->so->first_immediate); + + return reg; +} + /** * Handle cp for a given src register. This additionally handles * the cases of collapsing immedate/const (which replace the src @@ -281,6 +340,13 @@ reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr, combine_flags(&new_flags, src); if (!valid_flags(instr, n, new_flags)) { + /* See if lowering an immediate to const would help. */ + if (valid_flags(instr, n, (new_flags & ~IR3_REG_IMMED) | IR3_REG_CONST)) { + debug_assert(new_flags & IR3_REG_IMMED); + instr->regs[n + 1] = lower_immed(ctx, src_reg, new_flags); + return; + } + /* special case for "normal" mad instructions, we can * try swapping the first two args if that fits better. * @@ -378,6 +444,9 @@ reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr, src_reg->flags = new_flags; src_reg->iim_val = iim_val; instr->regs[n+1] = src_reg; + } else if (valid_flags(instr, n, (new_flags & ~IR3_REG_IMMED) | IR3_REG_CONST)) { + /* See if lowering an immediate to const would help. */ + instr->regs[n+1] = lower_immed(ctx, src_reg, new_flags); } return; @@ -484,6 +553,7 @@ void ir3_cp(struct ir3 *ir, struct ir3_shader_variant *so) { struct ir3_cp_ctx ctx = { + .shader = ir, .so = so, }; -- 2.7.4