}
}
+void
+optimize_encoding_vop2(Program* program, ra_ctx& ctx, RegisterFile& register_file,
+ aco_ptr<Instruction>& instr)
+{
+ /* try to optimize v_mad_f32 -> v_mac_f32 */
+ if ((instr->opcode != aco_opcode::v_mad_f32 &&
+ (instr->opcode != aco_opcode::v_fma_f32 || program->gfx_level < GFX10) &&
+ instr->opcode != aco_opcode::v_mad_f16 && instr->opcode != aco_opcode::v_mad_legacy_f16 &&
+ (instr->opcode != aco_opcode::v_fma_f16 || program->gfx_level < GFX10) &&
+ (instr->opcode != aco_opcode::v_pk_fma_f16 || program->gfx_level < GFX10) &&
+ (instr->opcode != aco_opcode::v_mad_legacy_f32 || !program->dev.has_mac_legacy32) &&
+ (instr->opcode != aco_opcode::v_fma_legacy_f32 || !program->dev.has_mac_legacy32) &&
+ (instr->opcode != aco_opcode::v_dot4_i32_i8 || program->family == CHIP_VEGA20)) ||
+ !instr->operands[2].isTemp() || !instr->operands[2].isKillBeforeDef() ||
+ instr->operands[2].getTemp().type() != RegType::vgpr ||
+ ((!instr->operands[0].isTemp() || instr->operands[0].getTemp().type() != RegType::vgpr) &&
+ (!instr->operands[1].isTemp() || instr->operands[1].getTemp().type() != RegType::vgpr)) ||
+ instr->usesModifiers() || instr->operands[0].physReg().byte() != 0 ||
+ instr->operands[1].physReg().byte() != 0 || instr->operands[2].physReg().byte() != 0)
+ return;
+
+ if (!instr->operands[1].isTemp() || instr->operands[1].getTemp().type() != RegType::vgpr)
+ std::swap(instr->operands[0], instr->operands[1]);
+
+ unsigned def_id = instr->definitions[0].tempId();
+ if (ctx.assignments[def_id].affinity) {
+ assignment& affinity = ctx.assignments[ctx.assignments[def_id].affinity];
+ if (affinity.assigned && affinity.reg != instr->operands[2].physReg() &&
+ !register_file.test(affinity.reg, instr->operands[2].bytes()))
+ return;
+ }
+
+ static_assert(sizeof(VOP2_instruction) <= sizeof(VOP3_instruction),
+ "Invalid direct instruction cast.");
+ static_assert(sizeof(VOP2_instruction) <= sizeof(VOP3P_instruction),
+ "Invalid direct instruction cast.");
+ instr->format = Format::VOP2;
+ switch (instr->opcode) {
+ case aco_opcode::v_mad_f32: instr->opcode = aco_opcode::v_mac_f32; break;
+ case aco_opcode::v_fma_f32: instr->opcode = aco_opcode::v_fmac_f32; break;
+ case aco_opcode::v_mad_f16:
+ case aco_opcode::v_mad_legacy_f16: instr->opcode = aco_opcode::v_mac_f16; break;
+ case aco_opcode::v_fma_f16: instr->opcode = aco_opcode::v_fmac_f16; break;
+ case aco_opcode::v_pk_fma_f16: instr->opcode = aco_opcode::v_pk_fmac_f16; break;
+ case aco_opcode::v_dot4_i32_i8: instr->opcode = aco_opcode::v_dot4c_i32_i8; break;
+ case aco_opcode::v_mad_legacy_f32: instr->opcode = aco_opcode::v_mac_legacy_f32; break;
+ case aco_opcode::v_fma_legacy_f32: instr->opcode = aco_opcode::v_fmac_legacy_f32; break;
+ default: break;
+ }
+}
+
+void
+optimize_encoding(Program* program, ra_ctx& ctx, RegisterFile& register_file,
+ aco_ptr<Instruction>& instr)
+{
+ if (instr->isVALU())
+ optimize_encoding_vop2(program, ctx, register_file, instr);
+}
+
} /* end namespace */
void
register_file.clear(op);
}
- /* try to optimize v_mad_f32 -> v_mac_f32 */
- if ((instr->opcode == aco_opcode::v_mad_f32 ||
- (instr->opcode == aco_opcode::v_fma_f32 && program->gfx_level >= GFX10) ||
- instr->opcode == aco_opcode::v_mad_f16 ||
- instr->opcode == aco_opcode::v_mad_legacy_f16 ||
- (instr->opcode == aco_opcode::v_fma_f16 && program->gfx_level >= GFX10) ||
- (instr->opcode == aco_opcode::v_pk_fma_f16 && program->gfx_level >= GFX10) ||
- (instr->opcode == aco_opcode::v_mad_legacy_f32 && program->dev.has_mac_legacy32) ||
- (instr->opcode == aco_opcode::v_fma_legacy_f32 && program->dev.has_mac_legacy32) ||
- (instr->opcode == aco_opcode::v_dot4_i32_i8 && program->family != CHIP_VEGA20)) &&
- instr->operands[2].isTemp() && instr->operands[2].isKillBeforeDef() &&
- instr->operands[2].getTemp().type() == RegType::vgpr &&
- ((instr->operands[0].isTemp() &&
- instr->operands[0].getTemp().type() == RegType::vgpr) ||
- (instr->operands[1].isTemp() &&
- instr->operands[1].getTemp().type() == RegType::vgpr)) &&
- !instr->usesModifiers() && instr->operands[0].physReg().byte() == 0 &&
- instr->operands[1].physReg().byte() == 0 && instr->operands[2].physReg().byte() == 0) {
- if (!instr->operands[1].isTemp() ||
- instr->operands[1].getTemp().type() != RegType::vgpr)
- std::swap(instr->operands[0], instr->operands[1]);
-
- unsigned def_id = instr->definitions[0].tempId();
- bool use_vop2 = true;
- if (ctx.assignments[def_id].affinity) {
- assignment& affinity = ctx.assignments[ctx.assignments[def_id].affinity];
- if (affinity.assigned && affinity.reg != instr->operands[2].physReg() &&
- !register_file.test(affinity.reg, instr->operands[2].bytes()))
- use_vop2 = false;
- }
- if (use_vop2) {
- static_assert(sizeof(VOP2_instruction) <= sizeof(VOP3_instruction),
- "Invalid direct instruction cast.");
- static_assert(sizeof(VOP2_instruction) <= sizeof(VOP3P_instruction),
- "Invalid direct instruction cast.");
- instr->format = Format::VOP2;
- switch (instr->opcode) {
- case aco_opcode::v_mad_f32: instr->opcode = aco_opcode::v_mac_f32; break;
- case aco_opcode::v_fma_f32: instr->opcode = aco_opcode::v_fmac_f32; break;
- case aco_opcode::v_mad_f16:
- case aco_opcode::v_mad_legacy_f16: instr->opcode = aco_opcode::v_mac_f16; break;
- case aco_opcode::v_fma_f16: instr->opcode = aco_opcode::v_fmac_f16; break;
- case aco_opcode::v_pk_fma_f16: instr->opcode = aco_opcode::v_pk_fmac_f16; break;
- case aco_opcode::v_dot4_i32_i8: instr->opcode = aco_opcode::v_dot4c_i32_i8; break;
- case aco_opcode::v_mad_legacy_f32:
- instr->opcode = aco_opcode::v_mac_legacy_f32;
- break;
- case aco_opcode::v_fma_legacy_f32:
- instr->opcode = aco_opcode::v_fmac_legacy_f32;
- break;
- default: break;
- }
- }
- }
+ optimize_encoding(program, ctx, register_file, instr);
/* Handle definitions which must have the same register as an operand.
* We expect that the definition has the same size as the operand, otherwise the new