let OtherPredicates = [HasMadMacF32Insts] in {
def : FMADPat <f32, V_MAC_F32_e64, fmad>;
def : FMADPat <f32, V_MAC_F32_e64, AMDGPUfmad_ftz>;
+} // OtherPredicates = [HasMadMacF32Insts]
// Don't allow source modifiers. If there are any source modifiers then it's
// better to select mad instead of mac.
-let SubtargetPredicate = isGFX6GFX7GFX10 in
+let SubtargetPredicate = isGFX6GFX7GFX10,
+ OtherPredicates = [HasMadMacF32Insts, NoFP32Denormals] in
def : GCNPat <
(f32 (fadd (AMDGPUfmul_legacy (VOP3NoMods f32:$src0),
(VOP3NoMods f32:$src1)),
(V_MAC_LEGACY_F32_e64 SRCMODS.NONE, $src0, SRCMODS.NONE, $src1,
SRCMODS.NONE, $src2, DSTCLAMP.NONE, DSTOMOD.NONE)
>;
-} // OtherPredicates = [HasMadMacF32Insts]
let SubtargetPredicate = Has16BitInsts in {
def : FMADPat <f16, V_MAC_F16_e64, fmad>;
$src2_mod, $src2, DSTCLAMP.NONE, DSTOMOD.NONE)
>;
-let OtherPredicates = [HasMadMacF32Insts] in {
+let OtherPredicates = [HasMadMacF32Insts] in
def : FMADModsPat<f32, V_MAD_F32, AMDGPUfmad_ftz>;
+let OtherPredicates = [HasMadMacF32Insts, NoFP32Denormals] in
def : GCNPat <
(f32 (fadd (AMDGPUfmul_legacy (VOP3Mods f32:$src0, i32:$src0_mod),
(VOP3Mods f32:$src1, i32:$src1_mod)),
(V_MAD_LEGACY_F32 $src0_mod, $src0, $src1_mod, $src1,
$src2_mod, $src2, DSTCLAMP.NONE, DSTOMOD.NONE)
>;
-} // OtherPredicates = [HasMadMacF32Insts]
let SubtargetPredicate = Has16BitInsts in
def : FMADModsPat<f16, V_MAD_F16, AMDGPUfmad_ftz>;
ret float %result
}
-define float @v_mad_legacy_f32(float %a, float %b, float %c) {
+; Don't form mad/mac instructions because they don't support denormals.
+define float @v_add_mul_legacy_f32(float %a, float %b, float %c) {
+; GFX6-LABEL: v_add_mul_legacy_f32:
+; GFX6: ; %bb.0:
+; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
+; GFX6-NEXT: v_add_f32_e32 v0, v0, v2
+; GFX6-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_add_mul_legacy_f32:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
+; GFX8-NEXT: v_add_f32_e32 v0, v0, v2
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_add_mul_legacy_f32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
+; GFX9-NEXT: v_add_f32_e32 v0, v0, v2
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX101-LABEL: v_add_mul_legacy_f32:
+; GFX101: ; %bb.0:
+; GFX101-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX101-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX101-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
+; GFX101-NEXT: ; implicit-def: $vcc_hi
+; GFX101-NEXT: v_add_f32_e32 v0, v0, v2
+; GFX101-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX103-LABEL: v_add_mul_legacy_f32:
+; GFX103: ; %bb.0:
+; GFX103-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX103-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX103-NEXT: v_mul_legacy_f32_e32 v0, v0, v1
+; GFX103-NEXT: ; implicit-def: $vcc_hi
+; GFX103-NEXT: v_add_f32_e32 v0, v0, v2
+; GFX103-NEXT: s_setpc_b64 s[30:31]
+ %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
+ %add = fadd float %mul, %c
+ ret float %add
+}
+
+define float @v_mad_legacy_f32(float %a, float %b, float %c) #2 {
; GFX6-LABEL: v_mad_legacy_f32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
ret float %add
}
-define float @v_mad_legacy_fneg_f32(float %a, float %b, float %c) {
+define float @v_mad_legacy_fneg_f32(float %a, float %b, float %c) #2 {
; GFX6-LABEL: v_mad_legacy_fneg_f32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
attributes #0 = { nounwind readnone speculatable willreturn }
attributes #1 = { nounwind readnone speculatable }
+attributes #2 = { "denormal-fp-math-f32"="preserve-sign" }
ret void
}
+; Don't form mad/mac instructions because they don't support denormals.
+; GCN-LABEL: {{^}}test_add_mul_legacy_f32:
+; GCN: v_mul_legacy_f32_e{{(32|64)}} v{{[0-9]+}}, s{{[0-9]+}}, {{[sv][0-9]+}}
+; GCN: v_add_f32_e{{(32|64)}} v{{[0-9]+}}, s{{[0-9]+}}, {{[sv][0-9]+}}
+define amdgpu_kernel void @test_add_mul_legacy_f32(float addrspace(1)* %out, float %a, float %b, float %c) #0 {
+ %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
+ %add = fadd float %mul, %c
+ store float %add, float addrspace(1)* %out, align 4
+ ret void
+}
+
; GCN-LABEL: {{^}}test_mad_legacy_f32:
; GFX6: v_mac_legacy_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
; GFX8: v_mad_legacy_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
; GFX101: v_mac_legacy_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
; GFX103: v_mul_legacy_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
; GFX103: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @test_mad_legacy_f32(float addrspace(1)* %out, float %a, float %b, float %c) #0 {
+define amdgpu_kernel void @test_mad_legacy_f32(float addrspace(1)* %out, float %a, float %b, float %c) #2 {
%mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
%add = fadd float %mul, %c
store float %add, float addrspace(1)* %out, align 4
; MADMACF32: v_mad_legacy_f32 v{{[0-9]+}}, -s{{[0-9]+}}, -{{[sv][0-9]+}}, v{{[0-9]+}}
; NOMADMACF32: v_mul_legacy_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}, -s{{[0-9]+}}
; NOMADMACF32: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
-define amdgpu_kernel void @test_mad_legacy_fneg_f32(float addrspace(1)* %out, float %a, float %b, float %c) #0 {
+define amdgpu_kernel void @test_mad_legacy_fneg_f32(float addrspace(1)* %out, float %a, float %b, float %c) #2 {
%a.fneg = fneg float %a
%b.fneg = fneg float %b
%mul = call float @llvm.amdgcn.fmul.legacy(float %a.fneg, float %b.fneg)
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind "denormal-fp-math"="preserve-sign" }