From: Daniel Schürmann Date: Thu, 9 Mar 2023 19:50:06 +0000 (+0100) Subject: aco: create hw_init_scratch() function for p_init_scratch lowering X-Git-Tag: upstream/23.3.3~11573 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=7d35bf24f6e8f8da37ba9a532539deb0faa95cff;p=platform%2Fupstream%2Fmesa.git aco: create hw_init_scratch() function for p_init_scratch lowering Part-of: --- diff --git a/src/amd/compiler/aco_builder_h.py b/src/amd/compiler/aco_builder_h.py index 9ab0a2a..9a23867 100644 --- a/src/amd/compiler/aco_builder_h.py +++ b/src/amd/compiler/aco_builder_h.py @@ -639,6 +639,8 @@ formats = [(f if len(f) == 5 else f + ('',)) for f in formats] % endfor }; +void hw_init_scratch(Builder& bld, Definition def, Operand scratch_addr, Operand scratch_offset); + } // namespace aco #endif /* _ACO_BUILDER_ */""" diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp index 4bba62f..80db136 100644 --- a/src/amd/compiler/aco_lower_to_hw_instr.cpp +++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp @@ -2116,6 +2116,37 @@ emit_set_mode_from_block(Builder& bld, Program& program, Block* block, bool alwa } void +hw_init_scratch(Builder& bld, Definition def, Operand scratch_addr, Operand scratch_offset) +{ + /* Since we know what the high 16 bits of scratch_hi is, we can set all the high 16 + * bits in the same instruction that we add the carry. + */ + Operand hi_add = Operand::c32(0xffff0000 - S_008F04_SWIZZLE_ENABLE_GFX6(1)); + Operand scratch_addr_lo(scratch_addr.physReg(), s1); + Operand scratch_addr_hi(scratch_addr_lo.physReg().advance(4), s1); + + if (bld.program->gfx_level >= GFX10) { + PhysReg scratch_lo = def.physReg(); + PhysReg scratch_hi = def.physReg().advance(4); + + bld.sop2(aco_opcode::s_add_u32, Definition(scratch_lo, s1), Definition(scc, s1), + scratch_addr_lo, scratch_offset); + bld.sop2(aco_opcode::s_addc_u32, Definition(scratch_hi, s1), Definition(scc, s1), + scratch_addr_hi, hi_add, Operand(scc, s1)); + + /* "((size - 1) << 11) | register" (FLAT_SCRATCH_LO/HI is encoded as register + * 20/21) */ + bld.sopk(aco_opcode::s_setreg_b32, Operand(scratch_lo, s1), (31 << 11) | 20); + bld.sopk(aco_opcode::s_setreg_b32, Operand(scratch_hi, s1), (31 << 11) | 21); + } else { + bld.sop2(aco_opcode::s_add_u32, Definition(flat_scr_lo, s1), Definition(scc, s1), + scratch_addr_lo, scratch_offset); + bld.sop2(aco_opcode::s_addc_u32, Definition(flat_scr_hi, s1), Definition(scc, s1), + scratch_addr_hi, hi_add, Operand(scc, s1)); + } +} + +void lower_to_hw_instr(Program* program) { Block* discard_block = NULL; @@ -2466,39 +2497,13 @@ lower_to_hw_instr(Program* program) break; Operand scratch_addr = instr->operands[0]; - Operand scratch_addr_lo(scratch_addr.physReg(), s1); if (program->stage.hw != HWStage::CS) { bld.smem(aco_opcode::s_load_dwordx2, instr->definitions[0], scratch_addr, Operand::zero()); - scratch_addr_lo.setFixed(instr->definitions[0].physReg()); - } - Operand scratch_addr_hi(scratch_addr_lo.physReg().advance(4), s1); - - /* Since we know what the high 16 bits of scratch_hi is, we can set all the high 16 - * bits in the same instruction that we add the carry. - */ - uint32_t hi_add = 0xffff0000 - S_008F04_SWIZZLE_ENABLE_GFX6(1); - - if (program->gfx_level >= GFX10) { - Operand scratch_lo(instr->definitions[0].physReg(), s1); - Operand scratch_hi(instr->definitions[0].physReg().advance(4), s1); - - bld.sop2(aco_opcode::s_add_u32, Definition(scratch_lo.physReg(), s1), - Definition(scc, s1), scratch_addr_lo, instr->operands[1]); - bld.sop2(aco_opcode::s_addc_u32, Definition(scratch_hi.physReg(), s1), - Definition(scc, s1), scratch_addr_hi, Operand::c32(hi_add), - Operand(scc, s1)); - - /* "((size - 1) << 11) | register" (FLAT_SCRATCH_LO/HI is encoded as register - * 20/21) */ - bld.sopk(aco_opcode::s_setreg_b32, scratch_lo, (31 << 11) | 20); - bld.sopk(aco_opcode::s_setreg_b32, scratch_hi, (31 << 11) | 21); - } else { - bld.sop2(aco_opcode::s_add_u32, Definition(flat_scr_lo, s1), Definition(scc, s1), - scratch_addr_lo, instr->operands[1]); - bld.sop2(aco_opcode::s_addc_u32, Definition(flat_scr_hi, s1), Definition(scc, s1), - scratch_addr_hi, Operand::c32(hi_add), Operand(scc, s1)); + scratch_addr.setFixed(instr->definitions[0].physReg()); } + + hw_init_scratch(bld, instr->definitions[0], scratch_addr, instr->operands[1]); break; } case aco_opcode::p_jump_to_epilog: {