From: Rhys Perry Date: Mon, 7 Sep 2020 19:44:54 +0000 (+0100) Subject: aco: fix value numbering of reductions X-Git-Tag: upstream/21.0.0~5509 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=834b449a46716d64bd2cee99a029cdc48813cc9a;p=platform%2Fupstream%2Fmesa.git aco: fix value numbering of reductions Non-ssa definitions caused an assertion in value numbering. Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 9c17733..ba1e3dd 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -7046,6 +7046,54 @@ void emit_uniform_subgroup(isel_context *ctx, nir_intrinsic_instr *instr, Temp s } } +Pseudo_reduction_instruction *create_reduction_instr(isel_context *ctx, aco_opcode aco_op, ReduceOp op, Definition dst, Temp src) +{ + assert(src.bytes() <= 8); + Builder bld(ctx->program, ctx->block); + + unsigned num_defs = 0; + Definition defs[5]; + defs[num_defs++] = dst; + defs[num_defs++] = bld.def(bld.lm); /* used internally to save/restore exec */ + + /* scalar identity temporary */ + bool need_sitmp = (ctx->program->chip_class <= GFX7 || ctx->program->chip_class >= GFX10) && aco_op != aco_opcode::p_reduce; + if (aco_op == aco_opcode::p_exclusive_scan) { + need_sitmp |= + (op == imin8 || op == imin16 || op == imin32 || op == imin64 || + op == imax8 || op == imax16 || op == imax32 || op == imax64 || + op == fmin16 || op == fmin32 || op == fmin64 || + op == fmax16 || op == fmax32 || op == fmax64 || + op == fmul16 || op == fmul64); + } + if (need_sitmp) + defs[num_defs++] = bld.def(RegType::sgpr, dst.size()); + + /* scc clobber */ + defs[num_defs++] = bld.def(s1, scc); + + /* vcc clobber */ + bool clobber_vcc = false; + if ((op == iadd32 || op == imul64) && ctx->program->chip_class < GFX9) + clobber_vcc = true; + if (op == iadd64 || op == umin64 || op == umax64 || op == imin64 || op == imax64) + clobber_vcc = true; + + if (clobber_vcc) + defs[num_defs++] = bld.def(bld.lm, vcc); + + Pseudo_reduction_instruction *reduce = create_instruction(aco_op, Format::PSEUDO_REDUCTION, 3, num_defs); + reduce->operands[0] = Operand(src); + /* setup_reduce_temp will update these undef operands if needed */ + reduce->operands[1] = Operand(RegClass(RegType::vgpr, dst.size()).as_linear()); + reduce->operands[2] = Operand(v1.as_linear()); + std::copy(defs, defs + num_defs, reduce->definitions.begin()); + + reduce->reduce_op = op; + + return reduce; +} + void emit_interp_center(isel_context *ctx, Temp dst, Temp pos1, Temp pos2) { Builder bld(ctx->program, ctx->block); @@ -7661,21 +7709,10 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr) unreachable("unknown reduce intrinsic"); } - aco_ptr reduce{create_instruction(aco_op, Format::PSEUDO_REDUCTION, 3, 5)}; - reduce->operands[0] = Operand(src); - // filled in by aco_reduce_assign.cpp, used internally as part of the - // reduce sequence - assert(dst.size() == 1 || dst.size() == 2); - reduce->operands[1] = Operand(RegClass(RegType::vgpr, dst.size()).as_linear()); - reduce->operands[2] = Operand(v1.as_linear()); - Temp tmp_dst = bld.tmp(dst.regClass()); - reduce->definitions[0] = Definition(tmp_dst); - reduce->definitions[1] = bld.def(ctx->program->lane_mask); // used internally - reduce->definitions[2] = Definition(); - reduce->definitions[3] = Definition(scc, s1); - reduce->definitions[4] = Definition(); - reduce->reduce_op = reduce_op; + aco_ptr reduce{ + create_reduction_instr(ctx, aco_op, reduce_op, Definition(tmp_dst), src)}; + reduce->cluster_size = cluster_size; ctx->block->instructions.emplace_back(std::move(reduce)); diff --git a/src/amd/compiler/aco_reduce_assign.cpp b/src/amd/compiler/aco_reduce_assign.cpp index 7bf7a6c..301fe80 100644 --- a/src/amd/compiler/aco_reduce_assign.cpp +++ b/src/amd/compiler/aco_reduce_assign.cpp @@ -152,34 +152,6 @@ void setup_reduce_temp(Program* program) instr->operands[1] = Operand(reduceTmp); if (need_vtmp) instr->operands[2] = Operand(vtmp); - - /* scalar temporary */ - Builder bld(program); - instr->definitions[1] = bld.def(s2); - - /* scalar identity temporary */ - bool need_sitmp = (program->chip_class <= GFX7 || program->chip_class >= GFX10) && instr->opcode != aco_opcode::p_reduce; - if (instr->opcode == aco_opcode::p_exclusive_scan) { - need_sitmp |= - (op == imin8 || op == imin16 || op == imin32 || op == imin64 || - op == imax8 || op == imax16 || op == imax32 || op == imax64 || - op == fmin16 || op == fmin32 || op == fmin64 || - op == fmax16 || op == fmax32 || op == fmax64 || - op == fmul16 || op == fmul64); - } - if (need_sitmp) { - instr->definitions[2] = bld.def(RegClass(RegType::sgpr, instr->operands[0].size())); - } - - /* vcc clobber */ - bool clobber_vcc = false; - if ((op == iadd32 || op == imul64) && program->chip_class < GFX9) - clobber_vcc = true; - if (op == iadd64 || op == umin64 || op == umax64 || op == imin64 || op == imax64) - clobber_vcc = true; - - if (clobber_vcc) - instr->definitions[4] = Definition(vcc, bld.lm); } } } diff --git a/src/amd/compiler/aco_util.h b/src/amd/compiler/aco_util.h index b04eaba..7728747 100644 --- a/src/amd/compiler/aco_util.h +++ b/src/amd/compiler/aco_util.h @@ -216,6 +216,12 @@ public: --length; } + /*! \brief Adds an element to the end of the span + */ + constexpr void push_back(const_reference val) noexcept { + *std::next(begin(), length++) = val; + } + /*! \brief Clears the span */ constexpr void clear() noexcept {