From f370e95421f553ace931a02743c96be80fd62dc8 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sun, 29 Mar 2015 11:24:57 -0400 Subject: [PATCH] freedreno/ir3: handle const/immed/abs/neg in cp Be smarter about propagating copies from const or immed, or with abs/neg modifiers. Also, realize that absneg.s and absneg.f are really "fancy" mov instructions. This opens up the possibility to remove more copies. It helps the TGSI frontend a bit, but will be really needed for the NIR f/e which builds everything up in SSA form (ie. will *always* insert a mov from const or immediate). Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/ir3/ir3.h | 6 + src/gallium/drivers/freedreno/ir3/ir3_cp.c | 333 ++++++++++++++++++++++++-- src/gallium/drivers/freedreno/ir3/ir3_group.c | 6 - 3 files changed, 314 insertions(+), 31 deletions(-) diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h index 8310286..f424f73 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.h +++ b/src/gallium/drivers/freedreno/ir3/ir3.h @@ -525,6 +525,12 @@ static inline struct ir3_instruction *ssa(struct ir3_register *reg) return NULL; } +static inline bool conflicts(struct ir3_instruction *a, + struct ir3_instruction *b) +{ + return (a && b) && (a != b); +} + static inline bool reg_gpr(struct ir3_register *r) { if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_ADDR)) diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cp.c b/src/gallium/drivers/freedreno/ir3/ir3_cp.c index ceebf80..3eb85f6 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_cp.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_cp.c @@ -32,29 +32,47 @@ /* * Copy Propagate: - * - * We could eventually drop this, if the front-end did not insert any - * mov's.. For now keeping it as a separate pass since that is less - * painful than updating the existing frontend. It is expected that - * with an eventual new NIR based frontend that we won't need this. */ -static void block_cp(struct ir3_block *block); -static struct ir3_instruction * instr_cp(struct ir3_instruction *instr); -static bool is_eligible_mov(struct ir3_instruction *instr) +/* Is it a non-transformative (ie. not type changing) mov? This can + * also include absneg.s/absneg.f, which for the most part can be + * treated as a mov (single src argument). + */ +static bool is_same_type_mov(struct ir3_instruction *instr) { + struct ir3_register *dst = instr->regs[0]; + + /* mov's that write to a0.x or p0.x are special: */ + if (dst->num == regid(REG_P0, 0)) + return false; + if (dst->num == regid(REG_A0, 0)) + return false; + if ((instr->category == 1) && - (instr->cat1.src_type == instr->cat1.dst_type)) { + (instr->cat1.src_type == instr->cat1.dst_type)) + return true; + if ((instr->category == 2) && ((instr->opc == OPC_ABSNEG_F) || + (instr->opc == OPC_ABSNEG_S))) + return true; + return false; +} + +/* is it a type preserving mov, with ok flags? */ +static bool is_eligible_mov(struct ir3_instruction *instr, bool allow_flags) +{ + if (is_same_type_mov(instr)) { struct ir3_register *dst = instr->regs[0]; struct ir3_register *src = instr->regs[1]; struct ir3_instruction *src_instr = ssa(src); if (dst->flags & (IR3_REG_ADDR | IR3_REG_RELATIV)) return false; - /* TODO: propagate abs/neg modifiers if possible */ - if (src->flags & (IR3_REG_FABS | IR3_REG_FNEG | - IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_RELATIV)) + if (src->flags & IR3_REG_RELATIV) return false; + if (!allow_flags) + if (src->flags & (IR3_REG_FABS | IR3_REG_FNEG | + IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT)) + return false; if (!src_instr) return false; /* TODO: remove this hack: */ @@ -65,10 +83,270 @@ static bool is_eligible_mov(struct ir3_instruction *instr) return false; } +static unsigned cp_flags(unsigned flags) +{ + /* only considering these flags (at least for now): */ + flags &= (IR3_REG_CONST | IR3_REG_IMMED | + IR3_REG_FNEG | IR3_REG_FABS | + IR3_REG_SNEG | IR3_REG_SABS | + IR3_REG_BNOT); + return flags; +} + +static bool valid_flags(struct ir3_instruction *instr, unsigned n, + unsigned flags) +{ + unsigned valid_flags; + flags = cp_flags(flags); + + /* clear flags that are 'ok' */ + switch (instr->category) { + case 1: + case 5: + /* no flags allowed */ + if (flags) + return false; + break; + case 6: + valid_flags = IR3_REG_IMMED; + if (flags & ~valid_flags) + return false; + break; + case 2: + valid_flags = ir3_cat2_absneg(instr->opc) | IR3_REG_CONST; + + if (ir3_cat2_immed(instr->opc)) + valid_flags |= IR3_REG_IMMED; + + if (flags & ~valid_flags) + return false; + + if (flags & (IR3_REG_CONST | IR3_REG_IMMED)) { + unsigned m = (n ^ 1) + 1; + /* cannot deal w/ const in both srcs: + * (note that some cat2 actually only have a single src) + */ + if (m < instr->regs_count) { + struct ir3_register *reg = instr->regs[m]; + if ((flags & IR3_REG_CONST) && (reg->flags & IR3_REG_CONST)) + return false; + if ((flags & IR3_REG_IMMED) && (reg->flags & IR3_REG_IMMED)) + return false; + } + /* cannot be const + ABS|NEG: */ + if (flags & (IR3_REG_FABS | IR3_REG_FNEG | + IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT)) + return false; + } + break; + case 3: + valid_flags = ir3_cat3_absneg(instr->opc) | IR3_REG_CONST; + + if (flags & ~valid_flags) + return false; + + if (flags & IR3_REG_CONST) { + /* cannot deal w/ const in 2nd src: */ + /* TODO in some common cases, like mad, we can swap + * first two args.. possibly we should allow that here + * and fixup in legalize? + */ + if (n == 1) + return false; + /* cannot be const + ABS|NEG: */ + if (flags & (IR3_REG_FABS | IR3_REG_FNEG | + IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT)) + return false; + } + break; + case 4: + /* seems like blob compiler avoids const as src.. */ + /* TODO double check if this is still the case on a4xx */ + if (flags & IR3_REG_CONST) + return false; + if (flags & (IR3_REG_SABS | IR3_REG_SNEG)) + return false; + break; + } + + return true; +} + +/* propagate register flags from src to dst.. negates need special + * handling to cancel each other out. + */ +static void combine_flags(unsigned *dstflags, unsigned srcflags) +{ + /* if what we are combining into already has (abs) flags, + * we can drop (neg) from src: + */ + if (*dstflags & IR3_REG_FABS) + srcflags &= ~IR3_REG_FNEG; + if (*dstflags & IR3_REG_SABS) + srcflags &= ~IR3_REG_SNEG; + + if (srcflags & IR3_REG_FABS) + *dstflags |= IR3_REG_FABS; + if (srcflags & IR3_REG_SABS) + *dstflags |= IR3_REG_SABS; + if (srcflags & IR3_REG_FNEG) + *dstflags ^= IR3_REG_FNEG; + if (srcflags & IR3_REG_SNEG) + *dstflags ^= IR3_REG_SNEG; + if (srcflags & IR3_REG_BNOT) + *dstflags ^= IR3_REG_BNOT; +} + +static struct ir3_instruction * instr_cp(struct ir3_instruction *instr, unsigned *flags); +/** + * Handle cp for a given src register. This additionally handles + * the cases of collapsing immedate/const (which replace the src + * register with a non-ssa src) or collapsing mov's from relative + * src (which needs to also fixup the address src reference by the + * instruction). + */ +static void +reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n) +{ + unsigned src_flags = 0, new_flags; + struct ir3_instruction *src_instr; + + if (is_meta(instr)) { + /* meta instructions cannot fold up register + * flags.. they are usually src for texture + * fetch, etc, where we cannot specify abs/neg + */ + reg->instr = instr_cp(reg->instr, NULL); + return; + } + + src_instr = instr_cp(reg->instr, &src_flags); + + new_flags = reg->flags; + combine_flags(&new_flags, src_flags); + + reg->flags = new_flags; + reg->instr = src_instr; + + if (!valid_flags(instr, n, reg->flags)) { + /* insert an absneg.f */ + if (reg->flags & (IR3_REG_SNEG | IR3_REG_SABS)) { + debug_assert(!(reg->flags & (IR3_REG_FNEG | IR3_REG_FABS))); + reg->instr = ir3_ABSNEG_S(instr->block, + reg->instr, cp_flags(src_flags)); + } else { + debug_assert(!(reg->flags & (IR3_REG_SNEG | IR3_REG_SABS))); + reg->instr = ir3_ABSNEG_F(instr->block, + reg->instr, cp_flags(src_flags)); + } + reg->flags &= ~cp_flags(src_flags); + /* send it through instr_cp() again since + * the absneg src might be a mov from const + * that could be cleaned up: + */ + reg->instr = instr_cp(reg->instr, NULL); + return; + } + + if (is_same_type_mov(reg->instr)) { + struct ir3_register *src_reg = reg->instr->regs[1]; + unsigned new_flags = src_reg->flags; + + combine_flags(&new_flags, reg->flags); + + if (!valid_flags(instr, n, new_flags)) + return; + + /* Here we handle the special case of mov from + * CONST and/or RELATIV. These need to be handled + * specially, because in the case of move from CONST + * there is no src ir3_instruction so we need to + * replace the ir3_register. And in the case of + * RELATIV we need to handle the address register + * dependency. + */ + if (src_reg->flags & IR3_REG_CONST) { + /* an instruction cannot reference two different + * address registers: + */ + if ((src_reg->flags & IR3_REG_RELATIV) && + conflicts(instr->address, reg->instr->address)) + return; + + src_reg->flags = new_flags; + instr->regs[n+1] = src_reg; + + if (src_reg->flags & IR3_REG_RELATIV) + instr->address = reg->instr->address; + + return; + } + + if ((src_reg->flags & IR3_REG_RELATIV) && + !conflicts(instr->address, reg->instr->address)) { + src_reg->flags = new_flags; + instr->regs[n+1] = src_reg; + instr->address = reg->instr->address; + + return; + } + + /* NOTE: seems we can only do immed integers, so don't + * need to care about float. But we do need to handle + * abs/neg *before* checking that the immediate requires + * few enough bits to encode: + * + * TODO: do we need to do something to avoid accidentally + * catching a float immed? + */ + if (src_reg->flags & IR3_REG_IMMED) { + int32_t iim_val = src_reg->iim_val; + + debug_assert((instr->category == 6) || + ((instr->category == 2) && + ir3_cat2_immed(instr->opc))); + + if (new_flags & IR3_REG_SABS) + iim_val = abs(iim_val); + + if (new_flags & IR3_REG_SNEG) + iim_val = -iim_val; + + if (new_flags & IR3_REG_BNOT) + iim_val = ~iim_val; + + if (!(iim_val & ~0x3ff)) { + new_flags &= ~(IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT); + src_reg->flags = new_flags; + src_reg->iim_val = iim_val; + instr->regs[n+1] = src_reg; + } + + return; + } + } +} + +/** + * Given an SSA src (instruction), return the one with extraneous + * mov's removed, ie, for (to copy NIR syntax): + * + * vec1 ssa1 = fadd , + * vec1 ssa2 = fabs ssa1 + * vec1 ssa3 = fneg ssa1 + * + * then calling instr_cp(ssa3, &flags) would return ssa1 with + * (IR3_REG_ABS | IR3_REG_NEGATE) in flags. If flags is NULL, + * then disallow eliminating copies which would require flag + * propagation (for example, we cannot propagate abs/neg into + * an output). + */ static struct ir3_instruction * -instr_cp(struct ir3_instruction *instr) +instr_cp(struct ir3_instruction *instr, unsigned *flags) { + struct ir3_register *reg; + /* stay within the block.. don't try to operate across * basic block boundaries or we'll have problems when * dealing with multiple basic blocks: @@ -76,9 +354,12 @@ instr_cp(struct ir3_instruction *instr) if (is_meta(instr) && (instr->opc == OPC_META_INPUT)) return instr; - if (is_eligible_mov(instr)) { - struct ir3_instruction *src_instr = ssa(instr->regs[1]); - return instr_cp(src_instr); + if (is_eligible_mov(instr, !!flags)) { + struct ir3_register *reg = instr->regs[1]; + struct ir3_instruction *src_instr = ssa(reg); + if (flags) + combine_flags(flags, reg->flags); + return instr_cp(src_instr, flags); } /* Check termination condition before walking children (rather @@ -89,18 +370,20 @@ instr_cp(struct ir3_instruction *instr) * up as a src, we only need to recursively walk the children * once.) */ - if (!ir3_instr_check_mark(instr)) { - struct ir3_register *reg; + if (ir3_instr_check_mark(instr)) + return instr; - /* walk down the graph from each src: */ - foreach_src(reg, instr) - if (reg->flags & IR3_REG_SSA) - reg->instr = instr_cp(reg->instr); + /* walk down the graph from each src: */ + foreach_src_n(reg, n, instr) { + if (!(reg->flags & IR3_REG_SSA)) + continue; - if (instr->address) - instr->address = instr_cp(instr->address); + reg_cp(instr, reg, n); } + if (instr->address) + instr->address = instr_cp(instr->address, NULL); + return instr; } @@ -111,7 +394,7 @@ static void block_cp(struct ir3_block *block) for (i = 0; i < block->noutputs; i++) { if (block->outputs[i]) { struct ir3_instruction *out = - instr_cp(block->outputs[i]); + instr_cp(block->outputs[i], NULL); block->outputs[i] = out; } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_group.c b/src/gallium/drivers/freedreno/ir3/ir3_group.c index d48ecc3..782f6e8 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_group.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_group.c @@ -118,12 +118,6 @@ static void instr_insert_mov(void *arr, int idx, struct ir3_instruction *instr) static struct group_ops instr_ops = { instr_get, instr_insert_mov }; - -static bool conflicts(struct ir3_instruction *a, struct ir3_instruction *b) -{ - return (a && b) && (a != b); -} - static void group_n(struct group_ops *ops, void *arr, unsigned n) { unsigned i, j; -- 2.7.4