operand_can_use_reg(amd_gfx_level gfx_level, aco_ptr<Instruction>& instr, unsigned idx, PhysReg reg,
RegClass rc)
{
- if (instr->operands[idx].isFixed())
- return instr->operands[idx].physReg() == reg;
-
bool is_writelane = instr->opcode == aco_opcode::v_writelane_b32 ||
instr->opcode == aco_opcode::v_writelane_b32_e64;
if (gfx_level <= GFX9 && is_writelane && idx <= 1) {
}
void
-get_reg_for_operand(ra_ctx& ctx, RegisterFile& register_file,
- std::vector<std::pair<Operand, Definition>>& parallelcopy,
- aco_ptr<Instruction>& instr, Operand& operand, unsigned operand_index)
+handle_fixed_operands(ra_ctx& ctx, RegisterFile& register_file,
+ std::vector<std::pair<Operand, Definition>>& parallelcopy,
+ aco_ptr<Instruction>& instr)
{
- /* check if the operand is fixed */
- PhysReg src = ctx.assignments[operand.tempId()].reg;
- PhysReg dst;
- if (operand.isFixed()) {
- assert(operand.physReg() != src);
+ assert(instr->operands.size() <= 64);
- /* check if target reg is blocked, and move away the blocking var */
- if (register_file.test(operand.physReg(), operand.bytes())) {
- PhysRegInterval target{operand.physReg(), operand.size()};
+ RegisterFile tmp_file(register_file);
- RegisterFile tmp_file(register_file);
+ uint64_t mask = 0;
+ for (unsigned i = 0; i < instr->operands.size(); i++) {
+ Operand& op = instr->operands[i];
- std::vector<unsigned> blocking_vars = collect_vars(ctx, tmp_file, target);
+ if (!op.isTemp() || !op.isFixed())
+ continue;
- tmp_file.clear(src, operand.regClass()); // TODO: try to avoid moving block vars to src
- tmp_file.block(operand.physReg(), operand.regClass());
+ PhysReg src = ctx.assignments[op.tempId()].reg;
- get_regs_for_copies(ctx, tmp_file, parallelcopy, blocking_vars, instr, PhysRegInterval());
+ if (op.physReg() == src) {
+ tmp_file.block(op.physReg(), op.regClass());
+ continue;
}
- dst = operand.physReg();
- } else {
- /* clear the operand in case it's only a stride mismatch */
- register_file.clear(src, operand.regClass());
- dst = get_reg(ctx, register_file, operand.getTemp(), parallelcopy, instr, operand_index);
+ bool found = false;
+ u_foreach_bit64 (j, mask) {
+ if (instr->operands[j].tempId() == op.tempId() &&
+ instr->operands[j].physReg() == op.physReg()) {
+ found = true;
+ break;
+ }
+ }
+ if (found)
+ continue; /* the copy is already added to the list */
+
+ /* clear from register_file so fixed operands are not collected be collect_vars() */
+ tmp_file.clear(src, op.regClass()); // TODO: try to avoid moving block vars to src
+
+ mask |= (uint64_t)1 << i;
+
+ Operand pc_op(instr->operands[i].getTemp(), src);
+ Definition pc_def = Definition(op.physReg(), pc_op.regClass());
+ parallelcopy.emplace_back(pc_op, pc_def);
+ }
+
+ if (!mask)
+ return;
+
+ std::vector<unsigned> blocking_vars;
+ u_foreach_bit64 (i, mask) {
+ Operand& op = instr->operands[i];
+ PhysRegInterval target{op.physReg(), op.size()};
+ std::vector<unsigned> blocking_vars2 = collect_vars(ctx, tmp_file, target);
+ blocking_vars.insert(blocking_vars.end(), blocking_vars2.begin(), blocking_vars2.end());
+
+ /* prevent get_regs_for_copies() from using these registers */
+ tmp_file.block(op.physReg(), op.regClass());
}
+ get_regs_for_copies(ctx, tmp_file, parallelcopy, blocking_vars, instr, PhysRegInterval());
+ update_renames(ctx, register_file, parallelcopy, instr, rename_not_killed_ops | fill_killed_ops);
+}
+
+void
+get_reg_for_operand(ra_ctx& ctx, RegisterFile& register_file,
+ std::vector<std::pair<Operand, Definition>>& parallelcopy,
+ aco_ptr<Instruction>& instr, Operand& operand, unsigned operand_index)
+{
+ /* clear the operand in case it's only a stride mismatch */
+ PhysReg src = ctx.assignments[operand.tempId()].reg;
+ register_file.clear(src, operand.regClass());
+ PhysReg dst = get_reg(ctx, register_file, operand.getTemp(), parallelcopy, instr, operand_index);
+
Operand pc_op = operand;
pc_op.setFixed(src);
Definition pc_def = Definition(dst, pc_op.regClass());
bool temp_in_scc = register_file[scc];
/* handle operands */
+ bool fixed = false;
for (unsigned i = 0; i < instr->operands.size(); ++i) {
auto& operand = instr->operands[i];
if (!operand.isTemp())
operand.setTemp(read_variable(ctx, operand.getTemp(), block.index));
assert(ctx.assignments[operand.tempId()].assigned);
+ fixed |=
+ operand.isFixed() && ctx.assignments[operand.tempId()].reg != operand.physReg();
+ }
+
+ if (fixed)
+ handle_fixed_operands(ctx, register_file, parallelcopy, instr);
+
+ for (unsigned i = 0; i < instr->operands.size(); ++i) {
+ auto& operand = instr->operands[i];
+ if (!operand.isTemp() || operand.isFixed())
+ continue;
+
PhysReg reg = ctx.assignments[operand.tempId()].reg;
if (operand_can_use_reg(program->gfx_level, instr, i, reg, operand.regClass()))
operand.setFixed(reg);
//! s2: %op1:s[2-3] = p_unit_test
Temp op1 = bld.pseudo(aco_opcode::p_unit_test, bld.def(s2));
- //! s2: %op1_2:s[0-1], s2: %op0_2:s[2-3] = p_parallelcopy %op1:s[2-3], %op0:s[0-1]
+ //! s2: %op0_2:s[2-3], s2: %op1_2:s[0-1] = p_parallelcopy %op0:s[0-1], %op1:s[2-3]
//! p_unit_test %op0_2:s[2-3], %op1_2:s[0-1]
Operand op(inputs[0]);
op.setFixed(PhysReg(2));
if (!setup_cs("s2 s1", GFX10))
return;
- //! s2: %tmp0_2:s[2-3], s1: %tmp1_2:s[1] = p_parallelcopy %tmp0:s[0-1], %tmp1:s[2]
+ //! s1: %tmp1_2:s[1], s2: %tmp0_2:s[2-3] = p_parallelcopy %tmp1:s[2], %tmp0:s[0-1]
//! p_unit_test %tmp1_2:s[1]
Operand op(inputs[1]);
op.setFixed(PhysReg(1));
if (!setup_cs("s2 s1 s1", GFX10))
return;
- //! s1: %tmp2_2:s[0], s2: %tmp0_2:s[2-3] = p_parallelcopy %tmp2:s[3], %tmp0:s[0-1]
+ //! s2: %tmp0_2:s[2-3], s1: %tmp2_2:s[0] = p_parallelcopy %tmp0:s[0-1], %tmp2:s[3]
//! p_unit_test %tmp0_2:s[2-3]
Operand op(inputs[0]);
op.setFixed(PhysReg(2));
if (!setup_cs("s2 s1 s1", GFX10))
return;
- //! s1: %tmp1_2:s[0], s1: %tmp2_2:s[1], s2: %tmp0_2:s[2-3] = p_parallelcopy %tmp1:s[2], %tmp2:s[3], %tmp0:s[0-1]
+ //! s2: %tmp0_2:s[2-3], s1: %tmp1_2:s[0], s1: %tmp2_2:s[1] = p_parallelcopy %tmp0:s[0-1], %tmp1:s[2], %tmp2:s[3]
//! p_unit_test %tmp0_2:s[2-3]
Operand op(inputs[0]);
op.setFixed(PhysReg(2));
if (!setup_cs("v1 v1", GFX10))
return;
- //! v1: %tmp0_2:v[1], v1: %tmp1_2:v[0] = p_parallelcopy %tmp0:v[0], %tmp1:v[1]
+ //! v1: %tmp1_2:v[0], v1: %tmp0_2:v[1] = p_parallelcopy %tmp1:v[1], %tmp0:v[0]
//! p_unit_test %tmp0_2:v[1], %tmp1_2:v[0]
bld.pseudo(aco_opcode::p_unit_test, inputs[0], Operand(inputs[1], PhysReg(256)));
finish_ra_test(ra_test_policy());
END_TEST
+BEGIN_TEST(regalloc.precolor.multiple_operands)
+ //>> v1: %tmp0:v[0], v1: %tmp1:v[1], v1: %tmp2:v[2], v1: %tmp3:v[3] = p_startpgm
+ if (!setup_cs("v1 v1 v1 v1", GFX10))
+ return;
+
+ //! v1: %tmp3_2:v[0], v1: %tmp0_2:v[1], v1: %tmp1_2:v[2], v1: %tmp2_2:v[3] = p_parallelcopy %tmp3:v[3], %tmp0:v[0], %tmp1:v[1], %tmp2:v[2]
+ //! p_unit_test %tmp3_2:v[0], %tmp0_2:v[1], %tmp1_2:v[2], %tmp2_2:v[3]
+ bld.pseudo(aco_opcode::p_unit_test, Operand(inputs[3], PhysReg(256+0)),
+ Operand(inputs[0], PhysReg(256+1)), Operand(inputs[1], PhysReg(256+2)),
+ Operand(inputs[2], PhysReg(256+3)));
+
+ finish_ra_test(ra_test_policy());
+END_TEST
+
+BEGIN_TEST(regalloc.precolor.different_regs)
+ //>> v1: %tmp0:v[0] = p_startpgm
+ if (!setup_cs("v1", GFX10))
+ return;
+
+ //! v1: %tmp1:v[1], v1: %tmp2:v[2] = p_parallelcopy %tmp0:v[0], %tmp0:v[0]
+ //! p_unit_test %tmp1:v[1], %tmp1:v[1], %tmp1:v[1]
+ bld.pseudo(aco_opcode::p_unit_test, Operand(inputs[0], PhysReg(256+0)),
+ Operand(inputs[0], PhysReg(256+1)), Operand(inputs[0], PhysReg(256+2)));
+
+ finish_ra_test(ra_test_policy());
+END_TEST
+
BEGIN_TEST(regalloc.scratch_sgpr.create_vector)
if (!setup_cs("v1 s1", GFX7))
return;