From f241bd3749cec55ca5fac9cb24f17553ab31c0e1 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Thu, 2 Sep 2021 17:04:29 +0100 Subject: [PATCH] aco: don't coalesce constant copies into non-power-of-two sizes MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Cc: mesa-stable Part-of: --- src/amd/compiler/aco_lower_to_hw_instr.cpp | 4 +++- src/amd/compiler/tests/test_to_hw_instr.cpp | 9 +++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp index 5494f7a..e52dba8 100644 --- a/src/amd/compiler/aco_lower_to_hw_instr.cpp +++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp @@ -1425,7 +1425,9 @@ try_coalesce_copies(lower_context* ctx, std::map& copy_ if (copy.op.isConstant()) { uint64_t val = copy.op.constantValue64() | (other->second.op.constantValue64() << (copy.bytes * 8u)); - if (!Operand::is_constant_representable(val, copy.bytes + other->second.bytes, true, + if (!util_is_power_of_two_or_zero(new_size)) + return; + if (!Operand::is_constant_representable(val, new_size, true, copy.def.regClass().type() == RegType::vgpr)) return; copy.op = Operand::get_const(ctx->program->chip_class, val, new_size); diff --git a/src/amd/compiler/tests/test_to_hw_instr.cpp b/src/amd/compiler/tests/test_to_hw_instr.cpp index 18deaca..853d407 100644 --- a/src/amd/compiler/tests/test_to_hw_instr.cpp +++ b/src/amd/compiler/tests/test_to_hw_instr.cpp @@ -367,6 +367,7 @@ BEGIN_TEST(to_hw_instr.subdword_constant) PhysReg v0_lo{256}; PhysReg v0_hi{256}; PhysReg v0_b1{256}; + PhysReg v1_lo{257}; PhysReg v1_hi{257}; v0_hi.reg_b += 2; v0_b1.reg_b += 1; @@ -455,6 +456,14 @@ BEGIN_TEST(to_hw_instr.subdword_constant) bld.pseudo(aco_opcode::p_unit_test, Operand::c32(11u)); bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b), Operand::c8(0x42)); + /* 32-bit and 8-bit copy */ + //! p_unit_test 12 + //! v1: %_:v[0] = v_mov_b32 0 + //! v1b: %_:v[1][0:8] = v_mov_b32 0 dst_sel:ubyte0 dst_preserve src0_sel:dword + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(12u)); + bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_lo, v1b), + Operand::zero(), Operand::zero(1)); + //! s_endpgm finish_to_hw_instr_test(); -- 2.7.4