From 3ac743c333e49c6c91e74073eb8ba01cc6762fc6 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Fri, 19 Feb 2021 12:14:14 +0100 Subject: [PATCH] ir3: Add pass to lower arrays to SSA This will be run right after nir->ir3. Even though we have SSA coming out of NIR, we still need it for NIR registers, even though we keep the original array around to insert false dependencies. Part-of: --- src/freedreno/ir3/ir3.h | 3 + src/freedreno/ir3/ir3_array_to_ssa.c | 299 +++++++++++++++++++++++++++++++++++ src/freedreno/ir3/meson.build | 1 + 3 files changed, 303 insertions(+) create mode 100644 src/freedreno/ir3/ir3_array_to_ssa.c diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index 29a4da4..5cd364f 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -1467,6 +1467,9 @@ bool ir3_cf(struct ir3 *ir); bool ir3_cp(struct ir3 *ir, struct ir3_shader_variant *so); bool ir3_cp_postsched(struct ir3 *ir); +/* Make arrays SSA */ +bool ir3_array_to_ssa(struct ir3 *ir); + /* group neighbors and insert mov's to resolve conflicts: */ bool ir3_group(struct ir3 *ir); diff --git a/src/freedreno/ir3/ir3_array_to_ssa.c b/src/freedreno/ir3/ir3_array_to_ssa.c new file mode 100644 index 0000000..53ea2d9 --- /dev/null +++ b/src/freedreno/ir3/ir3_array_to_ssa.c @@ -0,0 +1,299 @@ +/* + * Copyright (C) 2021 Valve Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* This pass lowers array accesses to SSA. + * + * After this pass, instructions writing arrays implicitly read the contents of + * the array defined in instr->regs[0]->def (possibly a phi node), perform the + * operation, and store to instr->regs[0]. + * + * This makes arrays appear like "normal" SSA values, even if the false + * dependencies mean that they always stay in CSSA form (i.e. able to removed + * out-of-SSA with no copies.) While hopefully they shouldn't induce copies in + * most cases, we can't make that guarantee while also splitting spilling from + * RA and guaranteeing a certain number of registers are used, so we have to + * insert the phi nodes to be able to know when copying should happen. + * + * The implementation is based on the idea in "Simple and Efficient Construction + * of Static Single Assignment Form" of scanning backwards to find the + * definition. However, since we're not doing this on-the-fly we can simplify + * things a little by doing a pre-pass to get the last definition of each array + * in each block. Then we optimize trivial phis in a separate pass, "on the fly" + * so that we don't have to rewrite (and keep track of) users. + */ + +#include "ir3.h" +#include + +struct array_state { + struct ir3_register *live_in_definition; + struct ir3_register *live_out_definition; + bool constructed; + bool optimized; +}; + +struct array_ctx { + struct array_state *states; + struct ir3 *ir; + unsigned array_count; +}; + +static struct array_state * +get_state(struct array_ctx *ctx, struct ir3_block *block, unsigned id) +{ + return &ctx->states[ctx->array_count * block->index + id]; +} + +static struct ir3_register * +read_value_beginning(struct array_ctx *ctx, struct ir3_block *block, struct ir3_array *arr); + +static struct ir3_register * +read_value_end(struct array_ctx *ctx, struct ir3_block *block, struct ir3_array *arr) +{ + struct array_state *state = get_state(ctx, block, arr->id); + if (state->live_out_definition) + return state->live_out_definition; + + state->live_out_definition = read_value_beginning(ctx, block, arr); + return state->live_out_definition; +} + +/* Roughly equivalent to readValueRecursive from the paper: */ +static struct ir3_register * +read_value_beginning(struct array_ctx *ctx, struct ir3_block *block, struct ir3_array *arr) +{ + struct array_state *state = get_state(ctx, block, arr->id); + + if (state->constructed) + return state->live_in_definition; + + if (block->predecessors_count == 0) { + state->constructed = true; + return NULL; + } + + if (block->predecessors_count == 1) { + state->live_in_definition = read_value_end(ctx, block->predecessors[0], arr); + state->constructed = true; + return state->live_in_definition; + } + + unsigned flags = IR3_REG_ARRAY | (arr->half ? IR3_REG_HALF : 0); + struct ir3_instruction *phi = + ir3_instr_create(block, OPC_META_PHI, block->predecessors_count + 1); + list_del(&phi->node); + list_add(&phi->node, &block->instr_list); + + struct ir3_register *dst = __ssa_dst(phi); + dst->flags |= flags; + dst->array.id = arr->id; + dst->size = arr->length; + + state->live_in_definition = phi->regs[0]; + state->constructed = true; + + for (unsigned i = 0; i < block->predecessors_count; i++) { + struct ir3_register *src = read_value_end(ctx, block->predecessors[i], arr); + struct ir3_register *src_reg; + if (src) { + src_reg = __ssa_src(phi, src->instr, flags); + } else { + src_reg = ir3_reg_create(phi, INVALID_REG, flags | IR3_REG_SSA); + } + src_reg->array.id = arr->id; + src_reg->size = arr->length; + } + return phi->regs[0]; +} + +static struct ir3_register * +remove_trivial_phi(struct ir3_instruction *phi) +{ + /* Break cycles */ + if (phi->data) + return phi->data; + + phi->data = phi->regs[0]; + + struct ir3_register *unique_def = NULL; + bool unique = true; + for (unsigned i = 0; i < phi->block->predecessors_count; i++) { + struct ir3_register *src = phi->regs[i + 1]; + + /* If there are any undef sources, then the remaining sources may not + * dominate the phi node, even if they are all equal. So we need to + * bail out in this case. + * + * This seems to be a bug in the original paper. + */ + if (!src->def) { + unique = false; + break; + } + + struct ir3_instruction *src_instr = src->def->instr; + + /* phi sources which point to the phi itself don't count for + * figuring out if the phi is trivial + */ + if (src_instr == phi) + continue; + + if (src_instr->opc == OPC_META_PHI) { + src->def = remove_trivial_phi(src->def->instr); + } + + if (unique_def) { + if (unique_def != src->def) { + unique = false; + break; + } + } else { + unique_def = src->def; + } + } + + if (unique) { + phi->data = unique_def; + return unique_def; + } else { + return phi->regs[0]; + } +} + +static struct ir3_register * +lookup_value(struct ir3_register *reg) +{ + if (reg->instr->opc == OPC_META_PHI) + return reg->instr->data; + return reg; +} + +static struct ir3_register * +lookup_live_in(struct array_ctx *ctx, struct ir3_block *block, unsigned id) +{ + struct array_state *state = get_state(ctx, block, id); + if (state->live_in_definition) + return lookup_value(state->live_in_definition); + + return NULL; +} + +bool +ir3_array_to_ssa(struct ir3 *ir) +{ + struct array_ctx ctx = {}; + + foreach_array (array, &ir->array_list) { + ctx.array_count = MAX2(ctx.array_count, array->id + 1); + } + + if (ctx.array_count == 0) + return false; + + unsigned i = 0; + foreach_block (block, &ir->block_list) { + block->index = i++; + } + + ctx.ir = ir; + ctx.states = calloc(ctx.array_count * i, sizeof(struct array_state)); + + foreach_block (block, &ir->block_list) { + foreach_instr (instr, &block->instr_list) { + for (unsigned i = 0; i < instr->regs_count; i++) { + if ((instr->regs[i]->flags & IR3_REG_ARRAY) && + (instr->regs[i]->flags & IR3_REG_DEST)) { + struct array_state *state = + get_state(&ctx, block, instr->regs[i]->array.id); + state->live_out_definition = instr->regs[i]; + } + } + } + } + + foreach_block (block, &ir->block_list) { + foreach_instr (instr, &block->instr_list) { + if (instr->opc == OPC_META_PHI) + continue; + + for (unsigned i = 0; i < instr->regs_count; i++) { + struct ir3_register *reg = instr->regs[i]; + if ((reg->flags & IR3_REG_ARRAY) && + (!reg->def || reg->def->instr->block != block)) { + reg->def = NULL; + struct ir3_array *arr = ir3_lookup_array(ir, reg->array.id); + + /* Construct any phi nodes necessary to read this value */ + read_value_beginning(&ctx, block, arr); + } + } + } + } + + foreach_block (block, &ir->block_list) { + foreach_instr_safe (instr, &block->instr_list) { + if (instr->opc == OPC_META_PHI) + remove_trivial_phi(instr); + else + break; + } + } + + foreach_block (block, &ir->block_list) { + foreach_instr_safe (instr, &block->instr_list) { + if (instr->opc == OPC_META_PHI) { + if (!(instr->flags & IR3_REG_ARRAY)) + continue; + if (instr->data != instr->regs[0]) { + list_del(&instr->node); + continue; + } + for (unsigned i = 1; i < instr->regs_count; i++) { + instr->regs[i] = lookup_value(instr->regs[i]); + } + } else { + for (unsigned i = 0; i < instr->regs_count; i++) { + struct ir3_register *reg = instr->regs[i]; + if ((reg->flags & IR3_REG_ARRAY)) { + if (!reg->def) { + /* It is assumed that before calling + * ir3_array_to_ssa(), reg->def was set to the + * previous writer of the array within the current + * block (if any). The reg->def of the first write + * to an array within a block was cleared in the + * loop calling read_value_beginning() above. + */ + reg->def = lookup_live_in(&ctx, block, reg->array.id); + } + reg->flags |= IR3_REG_SSA; + } + } + } + } + } + + free(ctx.states); + return true; +} + diff --git a/src/freedreno/ir3/meson.build b/src/freedreno/ir3/meson.build index 38e04f6..b3799f1 100644 --- a/src/freedreno/ir3/meson.build +++ b/src/freedreno/ir3/meson.build @@ -66,6 +66,7 @@ libfreedreno_ir3_files = files( 'ir3.c', 'ir3_a4xx.c', 'ir3_a6xx.c', + 'ir3_array_to_ssa.c', 'ir3_assembler.c', 'ir3_assembler.h', 'ir3_compiler_nir.c', -- 2.7.4