From 69f8daca16607a891842d4517542719759ba22dd Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Mon, 29 May 2023 13:05:56 -0400 Subject: [PATCH] pan/mdg: Ingest new-style registers Switch to register intrinsics, using the helpers. Since our backend copyprop chokes on non-SSA moves, we get better coalescing with this approach, hence the small improvements to instruction count / cycle count in shader-db. Changes to register pressure seem to be noise from iteration order. I'm not too worried. total instructions in shared programs: 1508444 -> 1508193 (-0.02%) instructions in affected programs: 42581 -> 42330 (-0.59%) helped: 482 HURT: 41 Inconclusive result (value mean confidence interval includes 0). total bundles in shared programs: 643023 -> 643136 (0.02%) bundles in affected programs: 16318 -> 16431 (0.69%) helped: 230 HURT: 85 Inconclusive result (value mean confidence interval includes 0). total quadwords in shared programs: 1125992 -> 1125600 (-0.03%) quadwords in affected programs: 125366 -> 124974 (-0.31%) helped: 507 HURT: 351 Quadwords are helped. total registers in shared programs: 90632 -> 90554 (-0.09%) registers in affected programs: 669 -> 591 (-11.66%) helped: 114 HURT: 31 Registers are helped. total threads in shared programs: 55607 -> 55600 (-0.01%) threads in affected programs: 20 -> 13 (-35.00%) helped: 1 HURT: 7 Inconclusive result (value mean confidence interval includes 0). total spills in shared programs: 1371 -> 1437 (4.81%) spills in affected programs: 44 -> 110 (150.00%) helped: 0 HURT: 2 total fills in shared programs: 5133 -> 5273 (2.73%) fills in affected programs: 84 -> 224 (166.67%) helped: 0 HURT: 2 Signed-off-by: Alyssa Rosenzweig Reviewed-by: Faith Ekstrand Part-of: --- src/panfrost/midgard/compiler.h | 40 ++++++++++++++++++-------- src/panfrost/midgard/midgard_compile.c | 45 ++++++++++++++++++++---------- src/panfrost/midgard/midgard_derivatives.c | 9 +----- 3 files changed, 60 insertions(+), 34 deletions(-) diff --git a/src/panfrost/midgard/compiler.h b/src/panfrost/midgard/compiler.h index 2272933..1a362bd 100644 --- a/src/panfrost/midgard/compiler.h +++ b/src/panfrost/midgard/compiler.h @@ -456,7 +456,7 @@ make_compiler_temp(compiler_context *ctx) static inline unsigned make_compiler_temp_reg(compiler_context *ctx) { - return ((ctx->func->impl->reg_alloc + ctx->temp_alloc++) << 1) | PAN_IS_REG; + return ((ctx->func->impl->ssa_alloc + ctx->temp_alloc++) << 1) | PAN_IS_REG; } static bool @@ -472,27 +472,43 @@ nir_ssa_index(nir_ssa_def *ssa) } static inline unsigned +nir_reg_index(nir_ssa_def *handle) +{ + return (handle->index << 1) | PAN_IS_REG; +} + +static inline unsigned nir_src_index(compiler_context *ctx, nir_src *src) { - if (src->is_ssa) + nir_intrinsic_instr *load = nir_load_reg_for_def(src->ssa); + + if (load) + return nir_reg_index(load->src[0].ssa); + else return nir_ssa_index(src->ssa); - else { - assert(!src->reg.indirect); - return (src->reg.reg->index << 1) | PAN_IS_REG; - } } static inline unsigned -nir_dest_index(nir_dest *dst) +nir_dest_index_with_mask(nir_dest *dest, uint16_t *write_mask) { - if (dst->is_ssa) - return (dst->ssa.index << 1) | 0; - else { - assert(!dst->reg.indirect); - return (dst->reg.reg->index << 1) | PAN_IS_REG; + nir_intrinsic_instr *store = nir_store_reg_for_def(&dest->ssa); + + if (store) { + *write_mask = nir_intrinsic_write_mask(store); + return nir_reg_index(store->src[1].ssa); + } else { + *write_mask = (uint16_t)BITFIELD_MASK(nir_dest_num_components(*dest)); + return nir_ssa_index(&dest->ssa); } } +static inline unsigned +nir_dest_index(nir_dest *dest) +{ + uint16_t write_mask = 0; + return nir_dest_index_with_mask(dest, &write_mask); +} + /* MIR manipulation */ void mir_rewrite_index(compiler_context *ctx, unsigned old, unsigned new); diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c index 13bfb75..9473b60 100644 --- a/src/panfrost/midgard/midgard_compile.c +++ b/src/panfrost/midgard/midgard_compile.c @@ -480,13 +480,14 @@ optimise_nir(nir_shader *nir, unsigned quirks, bool is_blend) NIR_PASS_V(nir, nir_opt_move, move_all); /* Take us out of SSA */ - NIR_PASS(progress, nir, nir_convert_from_ssa, true, false); + NIR_PASS(progress, nir, nir_convert_from_ssa, true, true); /* We are a vector architecture; write combine where possible */ NIR_PASS(progress, nir, nir_move_vec_src_uses_to_dest); - NIR_PASS(progress, nir, nir_lower_vec_to_movs, NULL, NULL); + NIR_PASS(progress, nir, nir_lower_vec_to_regs, NULL, NULL); NIR_PASS(progress, nir, nir_opt_dce); + NIR_PASS_V(nir, nir_trivialize_registers); } /* Do not actually emit a load; instead, cache the constant for inlining */ @@ -634,8 +635,6 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr) return; } - bool is_ssa = dest->is_ssa; - unsigned nr_components = nir_dest_num_components(*dest); unsigned nr_inputs = nir_op_infos[instr->op].num_inputs; unsigned op = 0; @@ -875,12 +874,13 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr) midgard_instruction ins = { .type = TAG_ALU_4, - .dest = nir_dest_index(dest), .dest_type = nir_op_infos[instr->op].output_type | nir_dest_bit_size(*dest), .roundmode = roundmode, }; + ins.dest = nir_dest_index_with_mask(dest, &ins.mask); + for (unsigned i = nr_inputs; i < ARRAY_SIZE(ins.src); ++i) ins.src[i] = ~0; @@ -920,15 +920,6 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr) ins.src_abs[1] = true; } - ins.mask = mask_of(nr_components); - - /* Apply writemask if non-SSA, keeping in mind that we can't write to - * components that don't exist. Note modifier => SSA => !reg => no - * writemask, so we don't have to worry about writemasks here.*/ - - if (!is_ssa) - ins.mask &= instr->dest.write_mask; - ins.op = op; ins.outmod = outmod; @@ -1550,6 +1541,32 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr) unsigned offset = 0, reg; switch (instr->intrinsic) { + case nir_intrinsic_decl_reg: + case nir_intrinsic_store_reg: + /* Always fully consumed */ + break; + + case nir_intrinsic_load_reg: { + /* NIR guarantees that, for typical isel, this will always be fully + * consumed. However, we also do our own nir_ssa_scalar chasing for + * address arithmetic, bypassing the source chasing helpers. So we can end + * up with unconsumed load_register instructions. Translate them here. 99% + * of the time, these moves will be DCE'd away. + */ + assert(instr->src[0].is_ssa); + nir_ssa_def *handle = instr->src[0].ssa; + + midgard_instruction ins = + v_mov(nir_reg_index(handle), nir_dest_index(&instr->dest)); + + ins.dest_type = ins.src_types[1] = + nir_type_uint | nir_dest_bit_size(instr->dest); + + ins.mask = BITFIELD_MASK(nir_dest_num_components(instr->dest)); + emit_mir_instruction(ctx, ins); + break; + } + case nir_intrinsic_discard_if: case nir_intrinsic_discard: { bool conditional = instr->intrinsic == nir_intrinsic_discard_if; diff --git a/src/panfrost/midgard/midgard_derivatives.c b/src/panfrost/midgard/midgard_derivatives.c index d243a00..56ae01a 100644 --- a/src/panfrost/midgard/midgard_derivatives.c +++ b/src/panfrost/midgard/midgard_derivatives.c @@ -95,13 +95,8 @@ void midgard_emit_derivatives(compiler_context *ctx, nir_alu_instr *instr) { /* Create texture instructions */ - - unsigned nr_components = nir_dest_num_components(instr->dest.dest); - midgard_instruction ins = { .type = TAG_TEXTURE_4, - .mask = mask_of(nr_components), - .dest = nir_dest_index(&instr->dest.dest), .dest_type = nir_type_float32, .src = { @@ -127,9 +122,7 @@ midgard_emit_derivatives(compiler_context *ctx, nir_alu_instr *instr) }, }; - if (!instr->dest.dest.is_ssa) - ins.mask &= instr->dest.write_mask; - + ins.dest = nir_dest_index_with_mask(&instr->dest.dest, &ins.mask); emit_mir_instruction(ctx, ins); } -- 2.7.4