aco: don't create v_mad_f32 on GFX10.3
authorRhys Perry <pendingchaos02@gmail.com>
Tue, 16 Jun 2020 16:43:01 +0000 (17:43 +0100)
committerRhys Perry <pendingchaos02@gmail.com>
Tue, 4 Aug 2020 19:39:33 +0000 (20:39 +0100)
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5546>

src/amd/compiler/aco_optimizer.cpp

index c1f0bc2..d060f37 100644 (file)
@@ -2614,7 +2614,7 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr
                 instr->opcode == aco_opcode::v_sub_f16 ||
                 instr->opcode == aco_opcode::v_subrev_f16;
    if (mad16 || mad32) {
-      bool need_fma = mad32 ? block.fp_mode.denorm32 != 0 :
+      bool need_fma = mad32 ? (block.fp_mode.denorm32 != 0 || ctx.program->chip_class >= GFX10_3) :
                               (block.fp_mode.denorm16_64 != 0 || ctx.program->chip_class >= GFX10);
       if (need_fma && instr->definitions[0].isPrecise())
          return;