aco: don't coalesce constant copies into non-power-of-two sizes
authorRhys Perry <pendingchaos02@gmail.com>
Thu, 2 Sep 2021 16:04:29 +0000 (17:04 +0100)
committerRhys Perry <pendingchaos02@gmail.com>
Fri, 3 Sep 2021 13:01:27 +0000 (14:01 +0100)
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Cc: mesa-stable
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12702>

src/amd/compiler/aco_lower_to_hw_instr.cpp
src/amd/compiler/tests/test_to_hw_instr.cpp

index 5494f7a..e52dba8 100644 (file)
@@ -1425,7 +1425,9 @@ try_coalesce_copies(lower_context* ctx, std::map<PhysReg, copy_operation>& copy_
    if (copy.op.isConstant()) {
       uint64_t val =
          copy.op.constantValue64() | (other->second.op.constantValue64() << (copy.bytes * 8u));
-      if (!Operand::is_constant_representable(val, copy.bytes + other->second.bytes, true,
+      if (!util_is_power_of_two_or_zero(new_size))
+         return;
+      if (!Operand::is_constant_representable(val, new_size, true,
                                               copy.def.regClass().type() == RegType::vgpr))
          return;
       copy.op = Operand::get_const(ctx->program->chip_class, val, new_size);
index 18deaca..853d407 100644 (file)
@@ -367,6 +367,7 @@ BEGIN_TEST(to_hw_instr.subdword_constant)
    PhysReg v0_lo{256};
    PhysReg v0_hi{256};
    PhysReg v0_b1{256};
+   PhysReg v1_lo{257};
    PhysReg v1_hi{257};
    v0_hi.reg_b += 2;
    v0_b1.reg_b += 1;
@@ -455,6 +456,14 @@ BEGIN_TEST(to_hw_instr.subdword_constant)
       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(11u));
       bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b), Operand::c8(0x42));
 
+      /* 32-bit and 8-bit copy */
+      //! p_unit_test 12
+      //! v1: %_:v[0] = v_mov_b32 0
+      //! v1b: %_:v[1][0:8] = v_mov_b32 0 dst_sel:ubyte0 dst_preserve src0_sel:dword
+      bld.pseudo(aco_opcode::p_unit_test, Operand::c32(12u));
+      bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_lo, v1b),
+                 Operand::zero(), Operand::zero(1));
+
       //! s_endpgm
 
       finish_to_hw_instr_test();