From 8bd7e2392b2044ae2cf9238de20cd48c1b71b283 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Daniel=20Sch=C3=BCrmann?= Date: Wed, 1 Sep 2021 15:54:35 +0200 Subject: [PATCH] aco: preserve subdword RC when lowering p_insert/p_extract Reviewed-by: Rhys Perry Part-of: --- src/amd/compiler/aco_lower_to_hw_instr.cpp | 27 ++++++--------------------- src/amd/compiler/tests/test_to_hw_instr.cpp | 16 ++++++++-------- 2 files changed, 14 insertions(+), 29 deletions(-) diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp index 8de0c5c..5494f7a 100644 --- a/src/amd/compiler/aco_lower_to_hw_instr.cpp +++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp @@ -2089,15 +2089,8 @@ lower_to_hw_instr(Program* program) Operand::c32(offset), Operand::c32(bits)); } } else if (dst.regClass() == v2b) { - aco_ptr sdwa{create_instruction( - aco_opcode::v_mov_b32, - (Format)((uint16_t)Format::VOP1 | (uint16_t)Format::SDWA), 1, 1)}; - sdwa->operands[0] = Operand(op.physReg().advance(-op.physReg().byte()), - RegClass::get(op.regClass().type(), 4)); - sdwa->definitions[0] = dst; - sdwa->sel[0] = SubdwordSel(1, op.physReg().byte() + offset / 8, signext); - sdwa->dst_sel = SubdwordSel::uword; - bld.insert(std::move(sdwa)); + bld.vop1_sdwa(aco_opcode::v_mov_b32, dst, op).instr->sdwa().sel[0] = + SubdwordSel(1, offset / 8, signext); } break; } @@ -2132,14 +2125,8 @@ lower_to_hw_instr(Program* program) bld.vop3(aco_opcode::v_bfe_u32, dst, op, Operand::zero(), Operand::c32(bits)); } else if (program->chip_class >= GFX9 || (op.regClass() != s1 && program->chip_class >= GFX8)) { - aco_ptr sdwa{create_instruction( - aco_opcode::v_mov_b32, - (Format)((uint16_t)Format::VOP1 | (uint16_t)Format::SDWA), 1, 1)}; - sdwa->operands[0] = op; - sdwa->definitions[0] = dst; - sdwa->sel[0] = SubdwordSel::dword; - sdwa->dst_sel = SubdwordSel(bits / 8, offset / 8, false); - bld.insert(std::move(sdwa)); + bld.vop1_sdwa(aco_opcode::v_mov_b32, dst, op).instr->sdwa().dst_sel = + SubdwordSel(bits / 8, offset / 8, false); } else { bld.vop3(aco_opcode::v_bfe_u32, dst, op, Operand::zero(), Operand::c32(bits)); bld.vop2(aco_opcode::v_lshlrev_b32, dst, Operand::c32(offset), @@ -2147,11 +2134,9 @@ lower_to_hw_instr(Program* program) } } else { assert(dst.regClass() == v2b); - Operand sdwa_op = Operand(op.physReg().advance(-op.physReg().byte()), - RegClass::get(op.regClass().type(), 4)); - bld.vop2_sdwa(aco_opcode::v_lshlrev_b32, dst, Operand::c32(offset), sdwa_op) + bld.vop2_sdwa(aco_opcode::v_lshlrev_b32, dst, Operand::c32(offset), op) .instr->sdwa() - .sel[1] = SubdwordSel(1, op.physReg().byte(), false); + .sel[1] = SubdwordSel::ubyte; } break; } diff --git a/src/amd/compiler/tests/test_to_hw_instr.cpp b/src/amd/compiler/tests/test_to_hw_instr.cpp index 1d2ce8c..18deaca 100644 --- a/src/amd/compiler/tests/test_to_hw_instr.cpp +++ b/src/amd/compiler/tests/test_to_hw_instr.cpp @@ -554,15 +554,15 @@ BEGIN_TEST(to_hw_instr.extract) //>> p_unit_test 4 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u)); //~gfx7.*! v2b: %_:v[0][0:16] = @v_bfe %_:v[1][0:16], 0, 8 - //~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1] dst_sel:uword0 dst_preserve src0_sel:@byte(0) + //~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:@byte(0) EXT(0, 0) - //~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1] dst_sel:uword0 dst_preserve src0_sel:@byte(2) + //~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][16:32] dst_sel:uword0 dst_preserve src0_sel:@byte(2) if (i != GFX7) EXT(0, 2) //~gfx7.*! v2b: %_:v[0][0:16] = @v_bfe %_:v[1][0:16], 8, 8 - //~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1] dst_sel:uword0 dst_preserve src0_sel:@byte(1) + //~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:@byte(1) EXT(1, 0) - //~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1] dst_sel:uword0 dst_preserve src0_sel:@byte(3) + //~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][16:32] dst_sel:uword0 dst_preserve src0_sel:@byte(3) if (i != GFX7) EXT(1, 2) @@ -640,15 +640,15 @@ BEGIN_TEST(to_hw_instr.insert) //>> p_unit_test 2 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u)); //~gfx7! v2b: %_:v[0][0:16] = v_bfe_u32 %_:v[1][0:16], 0, 8 - //~gfx[^7]! v2b: %0:v[0][0:16] = v_lshlrev_b32 0, %0:v[1] dst_sel:uword0 dst_preserve src0_sel:dword src1_sel:ubyte0 + //~gfx[^7]! v2b: %0:v[0][0:16] = v_lshlrev_b32 0, %0:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:dword src1_sel:ubyte0 INS(0, 0) - //~gfx[^7]! v2b: %0:v[0][16:32] = v_lshlrev_b32 0, %0:v[1] dst_sel:uword1 dst_preserve src0_sel:dword src1_sel:ubyte0 + //~gfx[^7]! v2b: %0:v[0][16:32] = v_lshlrev_b32 0, %0:v[1][0:16] dst_sel:uword1 dst_preserve src0_sel:dword src1_sel:ubyte0 if (i != GFX7) INS(0, 2) //~gfx7! v2b: %_:v[0][0:16] = v_lshlrev_b32 8, %_:v[1][0:16] - //~gfx[^7]! v2b: %0:v[0][0:16] = v_lshlrev_b32 8, %0:v[1] dst_sel:uword0 dst_preserve src0_sel:dword src1_sel:ubyte0 + //~gfx[^7]! v2b: %0:v[0][0:16] = v_lshlrev_b32 8, %0:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:dword src1_sel:ubyte0 INS(1, 0) - //~gfx[^7]! v2b: %0:v[0][16:32] = v_lshlrev_b32 8, %0:v[1] dst_sel:uword1 dst_preserve src0_sel:dword src1_sel:ubyte0 + //~gfx[^7]! v2b: %0:v[0][16:32] = v_lshlrev_b32 8, %0:v[1][0:16] dst_sel:uword1 dst_preserve src0_sel:dword src1_sel:ubyte0 if (i != GFX7) INS(1, 2) -- 2.7.4