From: Alyssa Rosenzweig Date: Wed, 12 Jul 2023 12:17:52 +0000 (-0400) Subject: nir/legacy: Fix fneg(load_reg) case X-Git-Tag: upstream/23.3.3~5650 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=34fcf6d479baafbff5e41a1e50993b8ce581bd50;p=platform%2Fupstream%2Fmesa.git nir/legacy: Fix fneg(load_reg) case Consider the IR: %0 = load_reg %1 = fneg %0 %2 = ffloor %1 %3 = bcsel .., .., %1 Because the fneg has both foldable and non-foldable users, nir/legacy does not fold the fneg into the load_reg. This ensures that the backend correctly emits a dedicated fneg instruction (with the load_reg folded in) for the bcsel to use. However, because the chasing helpers did not previously take other uses of a modifier into account, the helpers would fuse in the fneg to the ffloor. Except that doesn't work, because the load_reg instruction is supposed to be eliminated. So we end up with broken chased IR: 1 = fneg r0 2 = ffloor -NULL 3 = bcsel, ..., 1 The fix is easy: only fold modifiers into ALU instructions if the modifiers can be folded away. If we can't eliminate the modifier instruction altogether, it's not necessarily beneficial to fold it anyway from a register pressure perspective. So this is probably ok. With that check in place we get correct IR 1 = fneg r0 2 = ffloor 1 3 = bcsel, ..., 1 Fixes carchase/230.shader_test under softpipe. Signed-off-by: Alyssa Rosenzweig Reviewed-by: Faith Ekstrand Part-of: --- diff --git a/src/compiler/nir/nir_legacy.c b/src/compiler/nir/nir_legacy.c index 10bf323..fe47735 100644 --- a/src/compiler/nir/nir_legacy.c +++ b/src/compiler/nir/nir_legacy.c @@ -8,54 +8,6 @@ #include "nir_builder.h" #include "nir_legacy.h" -static inline bool -chase_source_mod(nir_ssa_def **ssa, nir_op op, uint8_t *swizzle) -{ - if ((*ssa)->parent_instr->type != nir_instr_type_alu) - return false; - - nir_alu_instr *alu = nir_instr_as_alu((*ssa)->parent_instr); - if (alu->op != op) - return false; - - /* This only works for unary ops */ - assert(nir_op_infos[op].num_inputs == 1); - - /* To fuse the source mod in, we need to compose the swizzles and string - * through the source. - */ - for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; ++i) - swizzle[i] = alu->src[0].swizzle[swizzle[i]]; - - assert(alu->src[0].src.is_ssa && "registers lowered to intrinsics"); - *ssa = alu->src[0].src.ssa; - return true; -} - -static inline bool -accepts_source_mod(const nir_src *src) -{ - /* No legacy user supports fp64 modifiers */ - if (nir_src_bit_size(*src) == 64) - return false; - - if (src->is_if) - return false; - - nir_instr *parent = src->parent_instr; - if (parent->type != nir_instr_type_alu) - return false; - - nir_alu_instr *alu = nir_instr_as_alu(parent); - nir_alu_src *alu_src = list_entry(src, nir_alu_src, src); - unsigned src_index = alu_src - alu->src; - - assert(src_index < nir_op_infos[alu->op].num_inputs); - nir_alu_type src_type = nir_op_infos[alu->op].input_types[src_index]; - - return nir_alu_type_get_base_type(src_type) == nir_type_float; -} - bool nir_legacy_float_mod_folds(nir_alu_instr *mod) { @@ -66,8 +18,22 @@ nir_legacy_float_mod_folds(nir_alu_instr *mod) if (mod->dest.dest.ssa.bit_size == 64) return false; - nir_foreach_use_including_if(use, &mod->dest.dest.ssa) { - if (!accepts_source_mod(use)) + nir_foreach_use_including_if(src, &mod->dest.dest.ssa) { + if (src->is_if) + return false; + + nir_instr *parent = src->parent_instr; + if (parent->type != nir_instr_type_alu) + return false; + + nir_alu_instr *alu = nir_instr_as_alu(parent); + nir_alu_src *alu_src = list_entry(src, nir_alu_src, src); + unsigned src_index = alu_src - alu->src; + + assert(src_index < nir_op_infos[alu->op].num_inputs); + nir_alu_type src_type = nir_op_infos[alu->op].input_types[src_index]; + + if (nir_alu_type_get_base_type(src_type) != nir_type_float) return false; } @@ -101,6 +67,37 @@ chase_alu_src_helper(const nir_src *src) } } +static inline bool +chase_source_mod(nir_ssa_def **ssa, nir_op op, uint8_t *swizzle) +{ + if ((*ssa)->parent_instr->type != nir_instr_type_alu) + return false; + + nir_alu_instr *alu = nir_instr_as_alu((*ssa)->parent_instr); + if (alu->op != op) + return false; + + /* If there are other uses of the modifier that don't fold, we can't fold it + * here either, in case of it's reading from a load_reg that won't be + * emitted. + */ + if (!nir_legacy_float_mod_folds(alu)) + return false; + + /* This only works for unary ops */ + assert(nir_op_infos[op].num_inputs == 1); + + /* To fuse the source mod in, we need to compose the swizzles and string + * through the source. + */ + for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; ++i) + swizzle[i] = alu->src[0].swizzle[swizzle[i]]; + + assert(alu->src[0].src.is_ssa && "registers lowered to intrinsics"); + *ssa = alu->src[0].src.ssa; + return true; +} + nir_legacy_alu_src nir_legacy_chase_alu_src(const nir_alu_src *src, bool fuse_fabs) { @@ -120,11 +117,9 @@ nir_legacy_chase_alu_src(const nir_alu_src *src, bool fuse_fabs) * fabs, since we chase from bottom-up. We don't handle fabs(fneg(x)) * since nir_opt_algebraic should have eliminated that. */ - if (accepts_source_mod(&src->src)) { - out.fneg = chase_source_mod(&out.src.ssa, nir_op_fneg, out.swizzle); - out.fabs = fuse_fabs && chase_source_mod(&out.src.ssa, nir_op_fabs, - out.swizzle); - } + out.fneg = chase_source_mod(&out.src.ssa, nir_op_fneg, out.swizzle); + if (fuse_fabs) + out.fabs = chase_source_mod(&out.src.ssa, nir_op_fabs, out.swizzle); return out; } else {