return true;
if (auto *CFP = dyn_cast<ConstantFPSDNode>(Op)) {
- auto F = CFP->getValueAPF();
+ const auto &F = CFP->getValueAPF();
if (F.isNaN() && F.isSignaling())
return false;
- return !F.isDenormal() || denormalsEnabledForType(DAG, Op.getValueType());
+ if (!F.isDenormal())
+ return true;
+
+ DenormalMode Mode =
+ DAG.getMachineFunction().getDenormalMode(F.getSemantics());
+ return Mode == DenormalMode::getIEEE();
}
// If source is a result of another standard FP operation it is already in
// snans will be quieted, so we only need to worry about denormals.
if (Subtarget->supportsMinMaxDenormModes() ||
+ // FIXME: denormalsEnabledForType is broken for dynamic
denormalsEnabledForType(DAG, Op.getValueType()))
return true;
[[fallthrough]];
}
default:
+ // FIXME: denormalsEnabledForType is broken for dynamic
return denormalsEnabledForType(DAG, Op.getValueType()) &&
DAG.isKnownNeverSNaN(Op);
}
if (mi_match(Reg, MRI, MIPatternMatch::m_GFCstOrSplat(FCR))) {
if (FCR->Value.isSignaling())
return false;
- return !FCR->Value.isDenormal() ||
- denormalsEnabledForType(MRI.getType(FCR->VReg), MF);
+ if (!FCR->Value.isDenormal())
+ return true;
+
+ DenormalMode Mode = MF.getDenormalMode(FCR->Value.getSemantics());
+ return Mode == DenormalMode::getIEEE();
}
if (MaxDepth == 0)
case AMDGPU::G_FMINNUM_IEEE:
case AMDGPU::G_FMAXNUM_IEEE: {
if (Subtarget->supportsMinMaxDenormModes() ||
+ // FIXME: denormalsEnabledForType is broken for dynamic
denormalsEnabledForType(MRI.getType(Reg), MF))
return true;
SDValue SITargetLowering::getCanonicalConstantFP(
SelectionDAG &DAG, const SDLoc &SL, EVT VT, const APFloat &C) const {
// Flush denormals to 0 if not enabled.
- if (C.isDenormal() && !denormalsEnabledForType(DAG, VT)) {
- return DAG.getConstantFP(APFloat::getZero(C.getSemantics(),
- C.isNegative()), SL, VT);
+ if (C.isDenormal()) {
+ DenormalMode Mode =
+ DAG.getMachineFunction().getDenormalMode(C.getSemantics());
+ if (Mode == DenormalMode::getPreserveSign()) {
+ return DAG.getConstantFP(
+ APFloat::getZero(C.getSemantics(), C.isNegative()), SL, VT);
+ }
+
+ if (Mode != DenormalMode::getIEEE())
+ return SDValue();
}
if (C.isNaN()) {
ret void
}
+define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f32_dynamic(ptr addrspace(1) %out) #5 {
+; GFX678-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32_dynamic:
+; GFX678: ; %bb.0:
+; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX678-NEXT: s_mov_b32 s2, 0x7fffff
+; GFX678-NEXT: v_mul_f32_e64 v2, 1.0, s2
+; GFX678-NEXT: s_waitcnt lgkmcnt(0)
+; GFX678-NEXT: v_mov_b32_e32 v0, s0
+; GFX678-NEXT: v_mov_b32_e32 v1, s1
+; GFX678-NEXT: flat_store_dword v[0:1], v2
+; GFX678-NEXT: s_endpgm
+;
+; GFX9-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32_dynamic:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX9-NEXT: s_mov_b32 s2, 0x7fffff
+; GFX9-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-NEXT: v_max_f32_e64 v1, s2, s2
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT: s_endpgm
+;
+; GFX11-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32_dynamic:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
+; GFX11-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-NEXT: v_max_f32_e64 v1, 0x7fffff, 0x7fffff
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+ %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 8388607 to float))
+ store float %canonicalized, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f32_dynamic_out(ptr addrspace(1) %out) #6 {
+; GFX678-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32_dynamic_out:
+; GFX678: ; %bb.0:
+; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX678-NEXT: s_mov_b32 s2, 0x7fffff
+; GFX678-NEXT: v_mul_f32_e64 v2, 1.0, s2
+; GFX678-NEXT: s_waitcnt lgkmcnt(0)
+; GFX678-NEXT: v_mov_b32_e32 v0, s0
+; GFX678-NEXT: v_mov_b32_e32 v1, s1
+; GFX678-NEXT: flat_store_dword v[0:1], v2
+; GFX678-NEXT: s_endpgm
+;
+; GFX9-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32_dynamic_out:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX9-NEXT: s_mov_b32 s2, 0x7fffff
+; GFX9-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-NEXT: v_max_f32_e64 v1, s2, s2
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT: s_endpgm
+;
+; GFX11-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32_dynamic_out:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
+; GFX11-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-NEXT: v_max_f32_e64 v1, 0x7fffff, 0x7fffff
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+ %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 8388607 to float))
+ store float %canonicalized, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @test_no_denormals_fold_canonicalize_denormal0_f32_dynamic_in(ptr addrspace(1) %out) #7 {
+; GFX678-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32_dynamic_in:
+; GFX678: ; %bb.0:
+; GFX678-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX678-NEXT: s_mov_b32 s2, 0x7fffff
+; GFX678-NEXT: v_mul_f32_e64 v2, 1.0, s2
+; GFX678-NEXT: s_waitcnt lgkmcnt(0)
+; GFX678-NEXT: v_mov_b32_e32 v0, s0
+; GFX678-NEXT: v_mov_b32_e32 v1, s1
+; GFX678-NEXT: flat_store_dword v[0:1], v2
+; GFX678-NEXT: s_endpgm
+;
+; GFX9-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32_dynamic_in:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; GFX9-NEXT: s_mov_b32 s2, 0x7fffff
+; GFX9-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-NEXT: v_max_f32_e64 v1, s2, s2
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT: s_endpgm
+;
+; GFX11-LABEL: test_no_denormals_fold_canonicalize_denormal0_f32_dynamic_in:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
+; GFX11-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-NEXT: v_max_f32_e64 v1, 0x7fffff, 0x7fffff
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
+ %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 8388607 to float))
+ store float %canonicalized, ptr addrspace(1) %out
+ ret void
+}
+
define amdgpu_kernel void @test_denormals_fold_canonicalize_denormal0_f32(ptr addrspace(1) %out) #3 {
; GFX678-LABEL: test_denormals_fold_canonicalize_denormal0_f32:
; GFX678: ; %bb.0:
attributes #2 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" }
attributes #3 = { nounwind "denormal-fp-math"="ieee,ieee" }
attributes #4 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" }
+attributes #5 = { nounwind "denormal-fp-math-f32"="dynamic,dynamic" }
+attributes #6 = { nounwind "denormal-fp-math-f32"="dynamic,ieee" }
+attributes #7 = { nounwind "denormal-fp-math-f32"="ieee,dynamic" }