aco: create v_mac_legacy_f32/v_fmac_legacy_f32
authorRhys Perry <pendingchaos02@gmail.com>
Tue, 27 Apr 2021 11:11:37 +0000 (12:11 +0100)
committerMarge Bot <emma+marge@anholt.net>
Thu, 20 Jan 2022 22:54:42 +0000 (22:54 +0000)
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13436>

src/amd/compiler/aco_ir.cpp
src/amd/compiler/aco_ir.h
src/amd/compiler/aco_opcodes.py
src/amd/compiler/aco_register_allocation.cpp

index 523b25e..1b624e4 100644 (file)
@@ -148,6 +148,7 @@ init_program(Program* program, Stage stage, const struct radv_shader_info* info,
    if (program->family == CHIP_TAHITI || program->family == CHIP_CARRIZO ||
        program->family == CHIP_HAWAII)
       program->dev.has_fast_fma32 = true;
+   program->dev.has_mac_legacy32 = program->chip_class <= GFX7 || program->chip_class >= GFX10;
 
    program->wgp_mode = wgp_mode;
 
index 39b2e3a..4a44448 100644 (file)
@@ -2047,6 +2047,7 @@ struct DeviceInfo {
    unsigned max_wave64_per_simd;
    unsigned simd_per_cu;
    bool has_fast_fma32 = false;
+   bool has_mac_legacy32 = false;
    bool xnack_enabled = false;
    bool sram_ecc_enabled = false;
 };
index 16494a7..d95f37a 100644 (file)
@@ -674,7 +674,8 @@ VOP2 = {
    (0x03, 0x03, 0x01, 0x01, 0x03, "v_add_f32", True),
    (0x04, 0x04, 0x02, 0x02, 0x04, "v_sub_f32", True),
    (0x05, 0x05, 0x03, 0x03, 0x05, "v_subrev_f32", True),
-   (0x06, 0x06,   -1,   -1, 0x06, "v_mac_legacy_f32", True),
+   (0x06, 0x06,   -1,   -1, 0x06, "v_mac_legacy_f32", True), #GFX6,7,10
+   (  -1,   -1,   -1,   -1, 0x06, "v_fmac_legacy_f32", True), #GFX10.3+
    (0x07, 0x07, 0x04, 0x04, 0x07, "v_mul_legacy_f32", True),
    (0x08, 0x08, 0x05, 0x05, 0x08, "v_mul_f32", True),
    (0x09, 0x09, 0x06, 0x06, 0x09, "v_mul_i32_i24", False),
@@ -1686,6 +1687,9 @@ for ver in ['gfx9', 'gfx10']:
             # v_mad_legacy_f32 is replaced with v_fma_legacy_f32 on GFX10.3
             if ver == 'gfx10' and names == set(['v_mad_legacy_f32', 'v_fma_legacy_f32']):
                 continue
+            # v_mac_legacy_f32 is replaced with v_fmac_legacy_f32 on GFX10.3
+            if ver == 'gfx10' and names == set(['v_mac_legacy_f32', 'v_fmac_legacy_f32']):
+                continue
 
             print('%s and %s share the same opcode number (%s)' % (op_to_name[key], op.name, ver))
             sys.exit(1)
index 088afaa..ab10b2f 100644 (file)
@@ -2383,6 +2383,13 @@ get_affinities(ra_ctx& ctx, std::vector<IDSet>& live_out_per_block)
                   op = instr->operands[2];
                   break;
 
+               case aco_opcode::v_mad_legacy_f32:
+               case aco_opcode::v_fma_legacy_f32:
+                  if (instr->usesModifiers() || !ctx.program->dev.has_mac_legacy32)
+                     continue;
+                  op = instr->operands[2];
+                  break;
+
                default: continue;
                }
 
@@ -2577,6 +2584,8 @@ register_allocation(Program* program, std::vector<IDSet>& live_out_per_block, ra
               instr->opcode == aco_opcode::v_mad_legacy_f16 ||
               (instr->opcode == aco_opcode::v_fma_f16 && program->chip_class >= GFX10) ||
               (instr->opcode == aco_opcode::v_pk_fma_f16 && program->chip_class >= GFX10) ||
+              (instr->opcode == aco_opcode::v_mad_legacy_f32 && program->dev.has_mac_legacy32) ||
+              (instr->opcode == aco_opcode::v_fma_legacy_f32 && program->dev.has_mac_legacy32) ||
               (instr->opcode == aco_opcode::v_dot4_i32_i8 && program->family != CHIP_VEGA20)) &&
              instr->operands[2].isTemp() && instr->operands[2].isKillBeforeDef() &&
              instr->operands[2].getTemp().type() == RegType::vgpr &&
@@ -2608,6 +2617,12 @@ register_allocation(Program* program, std::vector<IDSet>& live_out_per_block, ra
                case aco_opcode::v_fma_f16: instr->opcode = aco_opcode::v_fmac_f16; break;
                case aco_opcode::v_pk_fma_f16: instr->opcode = aco_opcode::v_pk_fmac_f16; break;
                case aco_opcode::v_dot4_i32_i8: instr->opcode = aco_opcode::v_dot4c_i32_i8; break;
+               case aco_opcode::v_mad_legacy_f32:
+                  instr->opcode = aco_opcode::v_mac_legacy_f32;
+                  break;
+               case aco_opcode::v_fma_legacy_f32:
+                  instr->opcode = aco_opcode::v_fmac_legacy_f32;
+                  break;
                default: break;
                }
             }
@@ -2617,6 +2632,7 @@ register_allocation(Program* program, std::vector<IDSet>& live_out_per_block, ra
          if (instr->opcode == aco_opcode::v_interp_p2_f32 ||
              instr->opcode == aco_opcode::v_mac_f32 || instr->opcode == aco_opcode::v_fmac_f32 ||
              instr->opcode == aco_opcode::v_mac_f16 || instr->opcode == aco_opcode::v_fmac_f16 ||
+             instr->opcode == aco_opcode::v_fmac_legacy_f32 ||
              instr->opcode == aco_opcode::v_pk_fmac_f16 ||
              instr->opcode == aco_opcode::v_writelane_b32 ||
              instr->opcode == aco_opcode::v_writelane_b32_e64 ||