[AMDGPU] Only enable mad/mac legacy f32 patterns if denormals may be flushed

author Jay Foad <jay.foad@amd.com>

Fri, 9 Oct 2020 12:28:19 +0000 (13:28 +0100)

committer Jay Foad <jay.foad@amd.com>

Fri, 9 Oct 2020 16:08:38 +0000 (17:08 +0100)
author Jay Foad <jay.foad@amd.com>
Fri, 9 Oct 2020 12:28:19 +0000 (13:28 +0100)
committer Jay Foad <jay.foad@amd.com>
Fri, 9 Oct 2020 16:08:38 +0000 (17:08 +0100)
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td

index 068d8dc..621c339 100644 (file)
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -881,10 +881,12 @@ let AddedComplexity = 9 in {
  let OtherPredicates = [HasMadMacF32Insts] in {
  def : FMADPat <f32, V_MAC_F32_e64, fmad>;
  def : FMADPat <f32, V_MAC_F32_e64, AMDGPUfmad_ftz>;
+} // OtherPredicates = [HasMadMacF32Insts]
  
  // Don't allow source modifiers. If there are any source modifiers then it's
  // better to select mad instead of mac.
-let SubtargetPredicate = isGFX6GFX7GFX10 in
+let SubtargetPredicate = isGFX6GFX7GFX10,
+    OtherPredicates = [HasMadMacF32Insts, NoFP32Denormals] in
  def : GCNPat <
        (f32 (fadd (AMDGPUfmul_legacy (VOP3NoMods f32:$src0),
                                      (VOP3NoMods f32:$src1)),
@@ -892,7 +894,6 @@ def : GCNPat <
        (V_MAC_LEGACY_F32_e64 SRCMODS.NONE, $src0, SRCMODS.NONE, $src1,
                              SRCMODS.NONE, $src2, DSTCLAMP.NONE, DSTOMOD.NONE)
  >;
-} // OtherPredicates = [HasMadMacF32Insts]
  
  let SubtargetPredicate = Has16BitInsts in {
  def : FMADPat <f16, V_MAC_F16_e64, fmad>;
@@ -909,9 +910,10 @@ class FMADModsPat<ValueType Ty, Instruction inst, SDPatternOperator mad_opr>
    $src2_mod, $src2, DSTCLAMP.NONE, DSTOMOD.NONE)
  >;
  
-let OtherPredicates = [HasMadMacF32Insts] in {
+let OtherPredicates = [HasMadMacF32Insts] in
  def : FMADModsPat<f32, V_MAD_F32, AMDGPUfmad_ftz>;
  
+let OtherPredicates = [HasMadMacF32Insts, NoFP32Denormals] in
  def : GCNPat <
        (f32 (fadd (AMDGPUfmul_legacy (VOP3Mods f32:$src0, i32:$src0_mod),
                                      (VOP3Mods f32:$src1, i32:$src1_mod)),
@@ -919,7 +921,6 @@ def : GCNPat <
        (V_MAD_LEGACY_F32 $src0_mod, $src0, $src1_mod, $src1,
                          $src2_mod, $src2, DSTCLAMP.NONE, DSTOMOD.NONE)
  >;
-} // OtherPredicates = [HasMadMacF32Insts]
  
  let SubtargetPredicate = Has16BitInsts in
  def : FMADModsPat<f16, V_MAD_F16, AMDGPUfmad_ftz>;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll

index ebe3ffd..9b6370d 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll
@@ -237,7 +237,52 @@ define float @v_mul_legacy_fneg_f32(float %a, float %b) {
    ret float %result
  }
  
-define float @v_mad_legacy_f32(float %a, float %b, float %c) {
+; Don't form mad/mac instructions because they don't support denormals.
+define float @v_add_mul_legacy_f32(float %a, float %b, float %c) {
+; GFX6-LABEL: v_add_mul_legacy_f32:
+; GFX6:       ; %bb.0:
+; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
+; GFX6-NEXT:    v_add_f32_e32 v0, v0, v2
+; GFX6-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_add_mul_legacy_f32:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
+; GFX8-NEXT:    v_add_f32_e32 v0, v0, v2
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_add_mul_legacy_f32:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
+; GFX9-NEXT:    v_add_f32_e32 v0, v0, v2
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX101-LABEL: v_add_mul_legacy_f32:
+; GFX101:       ; %bb.0:
+; GFX101-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX101-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX101-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
+; GFX101-NEXT:    ; implicit-def: $vcc_hi
+; GFX101-NEXT:    v_add_f32_e32 v0, v0, v2
+; GFX101-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX103-LABEL: v_add_mul_legacy_f32:
+; GFX103:       ; %bb.0:
+; GFX103-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX103-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX103-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
+; GFX103-NEXT:    ; implicit-def: $vcc_hi
+; GFX103-NEXT:    v_add_f32_e32 v0, v0, v2
+; GFX103-NEXT:    s_setpc_b64 s[30:31]
+  %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
+  %add = fadd float %mul, %c
+  ret float %add
+}
+
+define float @v_mad_legacy_f32(float %a, float %b, float %c) #2 {
  ; GFX6-LABEL: v_mad_legacy_f32:
  ; GFX6:       ; %bb.0:
  ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -279,7 +324,7 @@ define float @v_mad_legacy_f32(float %a, float %b, float %c) {
    ret float %add
  }
  
-define float @v_mad_legacy_fneg_f32(float %a, float %b, float %c) {
+define float @v_mad_legacy_fneg_f32(float %a, float %b, float %c) #2 {
  ; GFX6-LABEL: v_mad_legacy_fneg_f32:
  ; GFX6:       ; %bb.0:
  ; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -512,3 +557,4 @@ declare float @llvm.amdgcn.fmul.legacy(float, float) #1
  
  attributes #0 = { nounwind readnone speculatable willreturn }
  attributes #1 = { nounwind readnone speculatable }
+attributes #2 = { "denormal-fp-math-f32"="preserve-sign" }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll

index a91745b..53809d3 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll
@@ -38,6 +38,17 @@ define amdgpu_kernel void @test_mul_legacy_fabs_f32(float addrspace(1)* %out, fl
    ret void
  }
  
+; Don't form mad/mac instructions because they don't support denormals.
+; GCN-LABEL: {{^}}test_add_mul_legacy_f32:
+; GCN: v_mul_legacy_f32_e{{(32|64)}} v{{[0-9]+}}, s{{[0-9]+}}, {{[sv][0-9]+}}
+; GCN: v_add_f32_e{{(32|64)}} v{{[0-9]+}}, s{{[0-9]+}}, {{[sv][0-9]+}}
+define amdgpu_kernel void @test_add_mul_legacy_f32(float addrspace(1)* %out, float %a, float %b, float %c) #0 {
+  %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
+  %add = fadd float %mul, %c
+  store float %add, float addrspace(1)* %out, align 4
+  ret void
+}
+
  ; GCN-LABEL: {{^}}test_mad_legacy_f32:
  ; GFX6: v_mac_legacy_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
  ; GFX8: v_mad_legacy_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
@@ -45,7 +56,7 @@ define amdgpu_kernel void @test_mul_legacy_fabs_f32(float addrspace(1)* %out, fl
  ; GFX101: v_mac_legacy_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
  ; GFX103: v_mul_legacy_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
  ; GFX103: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @test_mad_legacy_f32(float addrspace(1)* %out, float %a, float %b, float %c) #0 {
+define amdgpu_kernel void @test_mad_legacy_f32(float addrspace(1)* %out, float %a, float %b, float %c) #2 {
    %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
    %add = fadd float %mul, %c
    store float %add, float addrspace(1)* %out, align 4
@@ -56,7 +67,7 @@ define amdgpu_kernel void @test_mad_legacy_f32(float addrspace(1)* %out, float %
  ; MADMACF32: v_mad_legacy_f32 v{{[0-9]+}}, -s{{[0-9]+}}, -{{[sv][0-9]+}}, v{{[0-9]+}}
  ; NOMADMACF32: v_mul_legacy_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}, -s{{[0-9]+}}
  ; NOMADMACF32: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @test_mad_legacy_fneg_f32(float addrspace(1)* %out, float %a, float %b, float %c) #0 {
+define amdgpu_kernel void @test_mad_legacy_fneg_f32(float addrspace(1)* %out, float %a, float %b, float %c) #2 {
    %a.fneg = fneg float %a
    %b.fneg = fneg float %b
    %mul = call float @llvm.amdgcn.fmul.legacy(float %a.fneg, float %b.fneg)
@@ -70,3 +81,4 @@ declare float @llvm.amdgcn.fmul.legacy(float, float) #1
  
  attributes #0 = { nounwind }
  attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind "denormal-fp-math"="preserve-sign" }
author	Jay Foad <jay.foad@amd.com>
	Fri, 9 Oct 2020 12:28:19 +0000 (13:28 +0100)
committer	Jay Foad <jay.foad@amd.com>
	Fri, 9 Oct 2020 16:08:38 +0000 (17:08 +0100)
llvm/lib/Target/AMDGPU/SIInstructions.td		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll		patch \| blob \| history