We are checking the unsafe-fp-math for sqrt but not for fpow, which behaves inconsistent.
As the direction is to remove this global option, we need to remove the unsafe-fp-math
check for sqrt and update the test with afn fast-math flags.
Reviewed By: Spatel
Differential Revision: https://reviews.llvm.org/D93891
// Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
// sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
- if ((!Options.UnsafeFPMath && !Flags.hasApproximateFuncs()) ||
+ if (!Flags.hasApproximateFuncs() ||
(!Options.NoInfsFPMath && !Flags.hasNoInfs()))
return SDValue();
EVT VT = Op.getValueType();
const SDNodeFlags Flags = Op->getFlags();
- bool AllowInaccurateRcp = DAG.getTarget().Options.UnsafeFPMath ||
- Flags.hasApproximateFuncs();
+ bool AllowInaccurateRcp = Flags.hasApproximateFuncs();
// Without !fpmath accuracy information, we can't do more because we don't
// know exactly whether rcp is accurate enough to meet !fpmath requirement.
; GCN-NSZ-DAG: v_cmp_nlt_f32_e64 {{.*}}, -[[D]]
define amdgpu_ps float @fneg_fadd_0_nsz(float inreg %tmp2, float inreg %tmp6, <4 x i32> %arg) local_unnamed_addr #2 {
.entry:
- %tmp7 = fdiv float 1.000000e+00, %tmp6
+ %tmp7 = fdiv afn float 1.000000e+00, %tmp6
%tmp8 = fmul float 0.000000e+00, %tmp7
%tmp9 = fmul reassoc nnan arcp contract float 0.000000e+00, %tmp8
%.i188 = fadd float %tmp9, 0.000000e+00
%gep2 = getelementptr half, half addrspace(1)* %in2, i32 4
%r0 = load half, half addrspace(1)* %in1, align 4
%r1 = load half, half addrspace(1)* %gep2, align 4
- %r2 = frem half %r0, %r1
+ %r2 = frem afn half %r0, %r1
store half %r2, half addrspace(1)* %out, align 4
ret void
}
%gep2 = getelementptr float, float addrspace(1)* %in2, i32 4
%r0 = load float, float addrspace(1)* %in1, align 4
%r1 = load float, float addrspace(1)* %gep2, align 4
- %r2 = frem float %r0, %r1
+ %r2 = frem afn float %r0, %r1
store float %r2, float addrspace(1)* %out, align 4
ret void
}
double addrspace(1)* %in2) #1 {
%r0 = load double, double addrspace(1)* %in1, align 8
%r1 = load double, double addrspace(1)* %in2, align 8
- %r2 = frem double %r0, %r1
+ %r2 = frem afn double %r0, %r1
store double %r2, double addrspace(1)* %out, align 8
ret void
}
; CHECK: sqrt.approx.f32
; CHECK: div.approx.f32
define float @sqrt_div_fast_ninf(float %a, float %b) #0 {
- %t1 = tail call ninf float @llvm.sqrt.f32(float %a)
+ %t1 = tail call ninf afn float @llvm.sqrt.f32(float %a)
%t2 = fdiv float %t1, %b
ret float %t2
}
; CHECK: sqrt.approx.ftz.f32
; CHECK: div.approx.ftz.f32
define float @sqrt_div_fast_ftz_ninf(float %a, float %b) #0 #1 {
- %t1 = tail call ninf float @llvm.sqrt.f32(float %a)
+ %t1 = tail call ninf afn float @llvm.sqrt.f32(float %a)
%t2 = fdiv float %t1, %b
ret float %t2
}
; CHECK: rcp.approx.ftz.f64
; CHECK: div.rn.f64
define double @sqrt_div_fast_ftz_f64_ninf(double %a, double %b) #0 #1 {
- %t1 = tail call ninf double @llvm.sqrt.f64(double %a)
+ %t1 = tail call ninf afn double @llvm.sqrt.f64(double %a)
%t2 = fdiv double %t1, %b
ret double %t2
}
; CHECK-LABEL: test_sqrt32_ninf
define float @test_sqrt32_ninf(float %a) #0 {
; CHECK: sqrt.approx.f32
- %ret = tail call ninf float @llvm.sqrt.f32(float %a)
+ %ret = tail call ninf afn float @llvm.sqrt.f32(float %a)
ret float %ret
}
; CHECK-LABEL: test_sqrt_ftz_ninf
define float @test_sqrt_ftz_ninf(float %a) #0 #1 {
; CHECK: sqrt.approx.ftz.f32
- %ret = tail call ninf float @llvm.sqrt.f32(float %a)
+ %ret = tail call ninf afn float @llvm.sqrt.f32(float %a)
ret float %ret
}
; so we just use the ftz version.
; CHECK: rsqrt.approx.f64
; CHECK: rcp.approx.ftz.f64
- %ret = tail call ninf double @llvm.sqrt.f64(double %a)
+ %ret = tail call ninf afn double @llvm.sqrt.f64(double %a)
ret double %ret
}
; There's no sqrt.approx.ftz.f64 instruction; we just use the non-ftz version.
; CHECK: rsqrt.approx.f64
; CHECK: rcp.approx.ftz.f64
- %ret = tail call ninf double @llvm.sqrt.f64(double %a)
+ %ret = tail call ninf afn double @llvm.sqrt.f64(double %a)
ret double %ret
}
; CHECK-LABEL: test_sqrt32_refined_ninf
define float @test_sqrt32_refined_ninf(float %a) #0 #2 {
; CHECK: rsqrt.approx.f32
- %ret = tail call ninf float @llvm.sqrt.f32(float %a)
+ %ret = tail call ninf afn float @llvm.sqrt.f32(float %a)
ret float %ret
}
; CHECK-LABEL: test_sqrt64_refined_ninf
define double @test_sqrt64_refined_ninf(double %a) #0 #2 {
; CHECK: rsqrt.approx.f64
- %ret = tail call ninf double @llvm.sqrt.f64(double %a)
+ %ret = tail call ninf afn double @llvm.sqrt.f64(double %a)
ret double %ret
}
; CHECK-LABEL: test_sqrt32_refined_ftz_ninf
define float @test_sqrt32_refined_ftz_ninf(float %a) #0 #1 #2 {
; CHECK: rsqrt.approx.ftz.f32
- %ret = tail call ninf float @llvm.sqrt.f32(float %a)
+ %ret = tail call ninf afn float @llvm.sqrt.f32(float %a)
ret float %ret
}
; CHECK-LABEL: test_sqrt64_refined_ftz_ninf
define double @test_sqrt64_refined_ftz_ninf(double %a) #0 #1 #2 {
; CHECK: rsqrt.approx.f64
- %ret = tail call ninf double @llvm.sqrt.f64(double %a)
+ %ret = tail call ninf afn double @llvm.sqrt.f64(double %a)
ret double %ret
}
; CHECK: [[COPY:%[0-9]+]]:fr32 = COPY $xmm0
; CHECK: [[DEF:%[0-9]+]]:fr32 = IMPLICIT_DEF
; CHECK: [[VRSQRTSSr:%[0-9]+]]:fr32 = VRSQRTSSr killed [[DEF]], [[COPY]]
- ; CHECK: %3:fr32 = ninf nofpexcept VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr
+ ; CHECK: %3:fr32 = ninf afn nofpexcept VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr
; CHECK: [[VMOVSSrm_alt:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load 4 from constant-pool)
- ; CHECK: %5:fr32 = ninf nofpexcept VFMADD213SSr [[VRSQRTSSr]], killed %3, [[VMOVSSrm_alt]], implicit $mxcsr
+ ; CHECK: %5:fr32 = ninf afn nofpexcept VFMADD213SSr [[VRSQRTSSr]], killed %3, [[VMOVSSrm_alt]], implicit $mxcsr
; CHECK: [[VMOVSSrm_alt1:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.1, $noreg :: (load 4 from constant-pool)
- ; CHECK: %7:fr32 = ninf nofpexcept VMULSSrr [[VRSQRTSSr]], [[VMOVSSrm_alt1]], implicit $mxcsr
- ; CHECK: %8:fr32 = ninf nofpexcept VMULSSrr killed %7, killed %5, implicit $mxcsr
- ; CHECK: %9:fr32 = ninf nofpexcept VMULSSrr [[COPY]], %8, implicit $mxcsr
- ; CHECK: %10:fr32 = ninf nofpexcept VFMADD213SSr %8, %9, [[VMOVSSrm_alt]], implicit $mxcsr
- ; CHECK: %11:fr32 = ninf nofpexcept VMULSSrr %9, [[VMOVSSrm_alt1]], implicit $mxcsr
- ; CHECK: %12:fr32 = ninf nofpexcept VMULSSrr killed %11, killed %10, implicit $mxcsr
+ ; CHECK: %7:fr32 = ninf afn nofpexcept VMULSSrr [[VRSQRTSSr]], [[VMOVSSrm_alt1]], implicit $mxcsr
+ ; CHECK: %8:fr32 = ninf afn nofpexcept VMULSSrr killed %7, killed %5, implicit $mxcsr
+ ; CHECK: %9:fr32 = ninf afn nofpexcept VMULSSrr [[COPY]], %8, implicit $mxcsr
+ ; CHECK: %10:fr32 = ninf afn nofpexcept VFMADD213SSr %8, %9, [[VMOVSSrm_alt]], implicit $mxcsr
+ ; CHECK: %11:fr32 = ninf afn nofpexcept VMULSSrr %9, [[VMOVSSrm_alt1]], implicit $mxcsr
+ ; CHECK: %12:fr32 = ninf afn nofpexcept VMULSSrr killed %11, killed %10, implicit $mxcsr
; CHECK: [[COPY1:%[0-9]+]]:vr128 = COPY %12
; CHECK: [[COPY2:%[0-9]+]]:vr128 = COPY [[COPY]]
; CHECK: [[VPBROADCASTDrm:%[0-9]+]]:vr128 = VPBROADCASTDrm $rip, 1, $noreg, %const.2, $noreg :: (load 4 from constant-pool)
; CHECK: [[COPY5:%[0-9]+]]:fr32 = COPY [[VPANDNrr]]
; CHECK: $xmm0 = COPY [[COPY5]]
; CHECK: RET 0, $xmm0
- %call = tail call ninf float @llvm.sqrt.f32(float %f)
+ %call = tail call ninf afn float @llvm.sqrt.f32(float %f)
ret float %call
}
; CHECK: [[COPY:%[0-9]+]]:fr32 = COPY $xmm0
; CHECK: [[DEF:%[0-9]+]]:fr32 = IMPLICIT_DEF
; CHECK: [[VRSQRTSSr:%[0-9]+]]:fr32 = VRSQRTSSr killed [[DEF]], [[COPY]]
- ; CHECK: %3:fr32 = ninf nofpexcept VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr
+ ; CHECK: %3:fr32 = ninf afn nofpexcept VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr
; CHECK: [[VMOVSSrm_alt:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load 4 from constant-pool)
- ; CHECK: %5:fr32 = ninf nofpexcept VFMADD213SSr [[VRSQRTSSr]], killed %3, [[VMOVSSrm_alt]], implicit $mxcsr
+ ; CHECK: %5:fr32 = ninf afn nofpexcept VFMADD213SSr [[VRSQRTSSr]], killed %3, [[VMOVSSrm_alt]], implicit $mxcsr
; CHECK: [[VMOVSSrm_alt1:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.1, $noreg :: (load 4 from constant-pool)
- ; CHECK: %7:fr32 = ninf nofpexcept VMULSSrr [[VRSQRTSSr]], [[VMOVSSrm_alt1]], implicit $mxcsr
- ; CHECK: %8:fr32 = ninf nofpexcept VMULSSrr killed %7, killed %5, implicit $mxcsr
- ; CHECK: %9:fr32 = ninf nofpexcept VMULSSrr [[COPY]], %8, implicit $mxcsr
- ; CHECK: %10:fr32 = ninf nofpexcept VFMADD213SSr %8, %9, [[VMOVSSrm_alt]], implicit $mxcsr
- ; CHECK: %11:fr32 = ninf nofpexcept VMULSSrr %9, [[VMOVSSrm_alt1]], implicit $mxcsr
- ; CHECK: %12:fr32 = ninf nofpexcept VMULSSrr killed %11, killed %10, implicit $mxcsr
+ ; CHECK: %7:fr32 = ninf afn nofpexcept VMULSSrr [[VRSQRTSSr]], [[VMOVSSrm_alt1]], implicit $mxcsr
+ ; CHECK: %8:fr32 = ninf afn nofpexcept VMULSSrr killed %7, killed %5, implicit $mxcsr
+ ; CHECK: %9:fr32 = ninf afn nofpexcept VMULSSrr [[COPY]], %8, implicit $mxcsr
+ ; CHECK: %10:fr32 = ninf afn nofpexcept VFMADD213SSr %8, %9, [[VMOVSSrm_alt]], implicit $mxcsr
+ ; CHECK: %11:fr32 = ninf afn nofpexcept VMULSSrr %9, [[VMOVSSrm_alt1]], implicit $mxcsr
+ ; CHECK: %12:fr32 = ninf afn nofpexcept VMULSSrr killed %11, killed %10, implicit $mxcsr
; CHECK: [[COPY1:%[0-9]+]]:vr128 = COPY %12
; CHECK: [[FsFLD0SS:%[0-9]+]]:fr32 = FsFLD0SS
; CHECK: %15:fr32 = nofpexcept VCMPSSrr [[COPY]], killed [[FsFLD0SS]], 0, implicit $mxcsr
; CHECK: [[COPY3:%[0-9]+]]:fr32 = COPY [[VPANDNrr]]
; CHECK: $xmm0 = COPY [[COPY3]]
; CHECK: RET 0, $xmm0
- %call = tail call ninf float @llvm.sqrt.f32(float %f)
+ %call = tail call ninf afn float @llvm.sqrt.f32(float %f)
ret float %call
}
; AVX512-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1}
; AVX512-NEXT: vmovaps %xmm1, %xmm0
; AVX512-NEXT: retq
- %call = tail call ninf float @__sqrtf_finite(float %f) #2
+ %call = tail call ninf afn float @__sqrtf_finite(float %f) #2
ret float %call
}
; AVX512-NEXT: vmovss %xmm2, %xmm1, %xmm1 {%k1}
; AVX512-NEXT: vmovaps %xmm1, %xmm0
; AVX512-NEXT: retq
- %call = tail call ninf float @__sqrtf_finite(float %f) #2
+ %call = tail call ninf afn float @__sqrtf_finite(float %f) #2
ret float %call
}
; AVX512-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1}
; AVX512-NEXT: vmovaps %xmm1, %xmm0
; AVX512-NEXT: retq
- %call = tail call ninf float @__sqrtf_finite(float %x) #2
+ %call = tail call ninf afn float @__sqrtf_finite(float %x) #2
ret float %call
}
; AVX512-NEXT: vcmpleps %xmm0, %xmm2, %xmm0
; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
- %call = tail call ninf <4 x float> @llvm.sqrt.v4f32(<4 x float> %x) #2
+ %call = tail call ninf afn <4 x float> @llvm.sqrt.v4f32(<4 x float> %x) #2
ret <4 x float> %call
}