[DAGCombine] Remove the check for unsafe-fp-math when we are checking the AFN

author QingShan Zhang <qshanz@cn.ibm.com>

Mon, 11 Jan 2021 02:25:53 +0000 (02:25 +0000)

committer QingShan Zhang <qshanz@cn.ibm.com>

Mon, 11 Jan 2021 02:25:53 +0000 (02:25 +0000)
author QingShan Zhang <qshanz@cn.ibm.com>
Mon, 11 Jan 2021 02:25:53 +0000 (02:25 +0000)
committer QingShan Zhang <qshanz@cn.ibm.com>
Mon, 11 Jan 2021 02:25:53 +0000 (02:25 +0000)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index 09c8f72..be57d92 100644 (file)
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -13918,7 +13918,7 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) {
  
    // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
    // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
-  if ((!Options.UnsafeFPMath && !Flags.hasApproximateFuncs()) ||
+  if (!Flags.hasApproximateFuncs() ||
        (!Options.NoInfsFPMath && !Flags.hasNoInfs()))
      return SDValue();
  
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp

index 23e817e..e68b4e6 100644 (file)
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -8172,8 +8172,7 @@ SDValue SITargetLowering::lowerFastUnsafeFDIV(SDValue Op,
    EVT VT = Op.getValueType();
    const SDNodeFlags Flags = Op->getFlags();
  
-  bool AllowInaccurateRcp = DAG.getTarget().Options.UnsafeFPMath ||
-                            Flags.hasApproximateFuncs();
+  bool AllowInaccurateRcp = Flags.hasApproximateFuncs();
  
    // Without !fpmath accuracy information, we can't do more because we don't
    // know exactly whether rcp is accurate enough to meet !fpmath requirement.
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll

index de10dae..eaf4232 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll
@@ -252,7 +252,7 @@ define amdgpu_ps float @fneg_fadd_0(float inreg %tmp2, float inreg %tmp6, <4 x i
  ; GCN-NSZ-DAG: v_cmp_nlt_f32_e64 {{.*}}, -[[D]]
  define amdgpu_ps float @fneg_fadd_0_nsz(float inreg %tmp2, float inreg %tmp6, <4 x i32> %arg) local_unnamed_addr #2 {
  .entry:
-  %tmp7 = fdiv float 1.000000e+00, %tmp6
+  %tmp7 = fdiv afn float 1.000000e+00, %tmp6
    %tmp8 = fmul float 0.000000e+00, %tmp7
    %tmp9 = fmul reassoc nnan arcp contract float 0.000000e+00, %tmp8
    %.i188 = fadd float %tmp9, 0.000000e+00
diff --git a/llvm/test/CodeGen/AMDGPU/frem.ll b/llvm/test/CodeGen/AMDGPU/frem.ll

index ef19917..46974c2 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/frem.ll
+++ b/llvm/test/CodeGen/AMDGPU/frem.ll
@@ -297,7 +297,7 @@ define amdgpu_kernel void @unsafe_frem_f16(half addrspace(1)* %out, half addrspa
     %gep2 = getelementptr half, half addrspace(1)* %in2, i32 4
     %r0 = load half, half addrspace(1)* %in1, align 4
     %r1 = load half, half addrspace(1)* %gep2, align 4
-   %r2 = frem half %r0, %r1
+   %r2 = frem afn half %r0, %r1
     store half %r2, half addrspace(1)* %out, align 4
     ret void
  }
@@ -576,7 +576,7 @@ define amdgpu_kernel void @unsafe_frem_f32(float addrspace(1)* %out, float addrs
     %gep2 = getelementptr float, float addrspace(1)* %in2, i32 4
     %r0 = load float, float addrspace(1)* %in1, align 4
     %r1 = load float, float addrspace(1)* %gep2, align 4
-   %r2 = frem float %r0, %r1
+   %r2 = frem afn float %r0, %r1
     store float %r2, float addrspace(1)* %out, align 4
     ret void
  }
@@ -924,7 +924,7 @@ define amdgpu_kernel void @unsafe_frem_f64(double addrspace(1)* %out, double add
                               double addrspace(1)* %in2) #1 {
     %r0 = load double, double addrspace(1)* %in1, align 8
     %r1 = load double, double addrspace(1)* %in2, align 8
-   %r2 = frem double %r0, %r1
+   %r2 = frem afn double %r0, %r1
     store double %r2, double addrspace(1)* %out, align 8
     ret void
  }
diff --git a/llvm/test/CodeGen/NVPTX/fast-math.ll b/llvm/test/CodeGen/NVPTX/fast-math.ll

index db5fb63..1f300fe 100644 (file)
--- a/llvm/test/CodeGen/NVPTX/fast-math.ll
+++ b/llvm/test/CodeGen/NVPTX/fast-math.ll
@@ -25,7 +25,7 @@ define float @sqrt_div_fast(float %a, float %b) #0 {
  ; CHECK: sqrt.approx.f32
  ; CHECK: div.approx.f32
  define float @sqrt_div_fast_ninf(float %a, float %b) #0 {
-  %t1 = tail call ninf float @llvm.sqrt.f32(float %a)
+  %t1 = tail call ninf afn float @llvm.sqrt.f32(float %a)
    %t2 = fdiv float %t1, %b
    ret float %t2
  }
@@ -52,7 +52,7 @@ define float @sqrt_div_fast_ftz(float %a, float %b) #0 #1 {
  ; CHECK: sqrt.approx.ftz.f32
  ; CHECK: div.approx.ftz.f32
  define float @sqrt_div_fast_ftz_ninf(float %a, float %b) #0 #1 {
-  %t1 = tail call ninf float @llvm.sqrt.f32(float %a)
+  %t1 = tail call ninf afn float @llvm.sqrt.f32(float %a)
    %t2 = fdiv float %t1, %b
    ret float %t2
  }
@@ -74,7 +74,7 @@ define double @sqrt_div_fast_ftz_f64(double %a, double %b) #0 #1 {
  ; CHECK: rcp.approx.ftz.f64
  ; CHECK: div.rn.f64
  define double @sqrt_div_fast_ftz_f64_ninf(double %a, double %b) #0 #1 {
-  %t1 = tail call ninf double @llvm.sqrt.f64(double %a)
+  %t1 = tail call ninf afn double @llvm.sqrt.f64(double %a)
    %t2 = fdiv double %t1, %b
    ret double %t2
  }
diff --git a/llvm/test/CodeGen/NVPTX/sqrt-approx.ll b/llvm/test/CodeGen/NVPTX/sqrt-approx.ll

index b4f077e..a7015e2 100644 (file)
--- a/llvm/test/CodeGen/NVPTX/sqrt-approx.ll
+++ b/llvm/test/CodeGen/NVPTX/sqrt-approx.ll
@@ -53,7 +53,7 @@ define float @test_sqrt32(float %a) #0 {
  ; CHECK-LABEL: test_sqrt32_ninf
  define float @test_sqrt32_ninf(float %a) #0 {
  ; CHECK: sqrt.approx.f32
-  %ret = tail call ninf float @llvm.sqrt.f32(float %a)
+  %ret = tail call ninf afn float @llvm.sqrt.f32(float %a)
    ret float %ret
  }
  
@@ -67,7 +67,7 @@ define float @test_sqrt_ftz(float %a) #0 #1 {
  ; CHECK-LABEL: test_sqrt_ftz_ninf
  define float @test_sqrt_ftz_ninf(float %a) #0 #1 {
  ; CHECK: sqrt.approx.ftz.f32
-  %ret = tail call ninf float @llvm.sqrt.f32(float %a)
+  %ret = tail call ninf afn float @llvm.sqrt.f32(float %a)
    ret float %ret
  }
  
@@ -85,7 +85,7 @@ define double @test_sqrt64_ninf(double %a) #0 {
  ; so we just use the ftz version.
  ; CHECK: rsqrt.approx.f64
  ; CHECK: rcp.approx.ftz.f64
-  %ret = tail call ninf double @llvm.sqrt.f64(double %a)
+  %ret = tail call ninf afn double @llvm.sqrt.f64(double %a)
    ret double %ret
  }
  
@@ -101,7 +101,7 @@ define double @test_sqrt64_ftz_ninf(double %a) #0 #1 {
  ; There's no sqrt.approx.ftz.f64 instruction; we just use the non-ftz version.
  ; CHECK: rsqrt.approx.f64
  ; CHECK: rcp.approx.ftz.f64
-  %ret = tail call ninf double @llvm.sqrt.f64(double %a)
+  %ret = tail call ninf afn double @llvm.sqrt.f64(double %a)
    ret double %ret
  }
  
@@ -128,7 +128,7 @@ define float @test_sqrt32_refined(float %a) #0 #2 {
  ; CHECK-LABEL: test_sqrt32_refined_ninf
  define float @test_sqrt32_refined_ninf(float %a) #0 #2 {
  ; CHECK: rsqrt.approx.f32
-  %ret = tail call ninf float @llvm.sqrt.f32(float %a)
+  %ret = tail call ninf afn float @llvm.sqrt.f32(float %a)
    ret float %ret
  }
  
@@ -150,7 +150,7 @@ define double @test_sqrt64_refined(double %a) #0 #2 {
  ; CHECK-LABEL: test_sqrt64_refined_ninf
  define double @test_sqrt64_refined_ninf(double %a) #0 #2 {
  ; CHECK: rsqrt.approx.f64
-  %ret = tail call ninf double @llvm.sqrt.f64(double %a)
+  %ret = tail call ninf afn double @llvm.sqrt.f64(double %a)
    ret double %ret
  }
  
@@ -174,7 +174,7 @@ define float @test_sqrt32_refined_ftz(float %a) #0 #1 #2 {
  ; CHECK-LABEL: test_sqrt32_refined_ftz_ninf
  define float @test_sqrt32_refined_ftz_ninf(float %a) #0 #1 #2 {
  ; CHECK: rsqrt.approx.ftz.f32
-  %ret = tail call ninf float @llvm.sqrt.f32(float %a)
+  %ret = tail call ninf afn float @llvm.sqrt.f32(float %a)
    ret float %ret
  }
  
@@ -197,7 +197,7 @@ define double @test_sqrt64_refined_ftz(double %a) #0 #1 #2 {
  ; CHECK-LABEL: test_sqrt64_refined_ftz_ninf
  define double @test_sqrt64_refined_ftz_ninf(double %a) #0 #1 #2 {
  ; CHECK: rsqrt.approx.f64
-  %ret = tail call ninf double @llvm.sqrt.f64(double %a)
+  %ret = tail call ninf afn double @llvm.sqrt.f64(double %a)
    ret double %ret
  }
  
diff --git a/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll b/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll

index b797049..3ccca93 100644 (file)
--- a/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll
+++ b/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll
@@ -23,16 +23,16 @@ define float @sqrt_ieee_ninf(float %f) #0 {
    ; CHECK:   [[COPY:%[0-9]+]]:fr32 = COPY $xmm0
    ; CHECK:   [[DEF:%[0-9]+]]:fr32 = IMPLICIT_DEF
    ; CHECK:   [[VRSQRTSSr:%[0-9]+]]:fr32 = VRSQRTSSr killed [[DEF]], [[COPY]]
-  ; CHECK:   %3:fr32 = ninf nofpexcept VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr
+  ; CHECK:   %3:fr32 = ninf afn nofpexcept VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr
    ; CHECK:   [[VMOVSSrm_alt:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load 4 from constant-pool)
-  ; CHECK:   %5:fr32 = ninf nofpexcept VFMADD213SSr [[VRSQRTSSr]], killed %3, [[VMOVSSrm_alt]], implicit $mxcsr
+  ; CHECK:   %5:fr32 = ninf afn nofpexcept VFMADD213SSr [[VRSQRTSSr]], killed %3, [[VMOVSSrm_alt]], implicit $mxcsr
    ; CHECK:   [[VMOVSSrm_alt1:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.1, $noreg :: (load 4 from constant-pool)
-  ; CHECK:   %7:fr32 = ninf nofpexcept VMULSSrr [[VRSQRTSSr]], [[VMOVSSrm_alt1]], implicit $mxcsr
-  ; CHECK:   %8:fr32 = ninf nofpexcept VMULSSrr killed %7, killed %5, implicit $mxcsr
-  ; CHECK:   %9:fr32 = ninf nofpexcept VMULSSrr [[COPY]], %8, implicit $mxcsr
-  ; CHECK:   %10:fr32 = ninf nofpexcept VFMADD213SSr %8, %9, [[VMOVSSrm_alt]], implicit $mxcsr
-  ; CHECK:   %11:fr32 = ninf nofpexcept VMULSSrr %9, [[VMOVSSrm_alt1]], implicit $mxcsr
-  ; CHECK:   %12:fr32 = ninf nofpexcept VMULSSrr killed %11, killed %10, implicit $mxcsr
+  ; CHECK:   %7:fr32 = ninf afn nofpexcept VMULSSrr [[VRSQRTSSr]], [[VMOVSSrm_alt1]], implicit $mxcsr
+  ; CHECK:   %8:fr32 = ninf afn nofpexcept VMULSSrr killed %7, killed %5, implicit $mxcsr
+  ; CHECK:   %9:fr32 = ninf afn nofpexcept VMULSSrr [[COPY]], %8, implicit $mxcsr
+  ; CHECK:   %10:fr32 = ninf afn nofpexcept VFMADD213SSr %8, %9, [[VMOVSSrm_alt]], implicit $mxcsr
+  ; CHECK:   %11:fr32 = ninf afn nofpexcept VMULSSrr %9, [[VMOVSSrm_alt1]], implicit $mxcsr
+  ; CHECK:   %12:fr32 = ninf afn nofpexcept VMULSSrr killed %11, killed %10, implicit $mxcsr
    ; CHECK:   [[COPY1:%[0-9]+]]:vr128 = COPY %12
    ; CHECK:   [[COPY2:%[0-9]+]]:vr128 = COPY [[COPY]]
    ; CHECK:   [[VPBROADCASTDrm:%[0-9]+]]:vr128 = VPBROADCASTDrm $rip, 1, $noreg, %const.2, $noreg :: (load 4 from constant-pool)
@@ -44,7 +44,7 @@ define float @sqrt_ieee_ninf(float %f) #0 {
    ; CHECK:   [[COPY5:%[0-9]+]]:fr32 = COPY [[VPANDNrr]]
    ; CHECK:   $xmm0 = COPY [[COPY5]]
    ; CHECK:   RET 0, $xmm0
-  %call = tail call ninf float @llvm.sqrt.f32(float %f)
+  %call = tail call ninf afn float @llvm.sqrt.f32(float %f)
    ret float %call
  }
  
@@ -68,16 +68,16 @@ define float @sqrt_daz_ninf(float %f) #1 {
    ; CHECK:   [[COPY:%[0-9]+]]:fr32 = COPY $xmm0
    ; CHECK:   [[DEF:%[0-9]+]]:fr32 = IMPLICIT_DEF
    ; CHECK:   [[VRSQRTSSr:%[0-9]+]]:fr32 = VRSQRTSSr killed [[DEF]], [[COPY]]
-  ; CHECK:   %3:fr32 = ninf nofpexcept VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr
+  ; CHECK:   %3:fr32 = ninf afn nofpexcept VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr
    ; CHECK:   [[VMOVSSrm_alt:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load 4 from constant-pool)
-  ; CHECK:   %5:fr32 = ninf nofpexcept VFMADD213SSr [[VRSQRTSSr]], killed %3, [[VMOVSSrm_alt]], implicit $mxcsr
+  ; CHECK:   %5:fr32 = ninf afn nofpexcept VFMADD213SSr [[VRSQRTSSr]], killed %3, [[VMOVSSrm_alt]], implicit $mxcsr
    ; CHECK:   [[VMOVSSrm_alt1:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.1, $noreg :: (load 4 from constant-pool)
-  ; CHECK:   %7:fr32 = ninf nofpexcept VMULSSrr [[VRSQRTSSr]], [[VMOVSSrm_alt1]], implicit $mxcsr
-  ; CHECK:   %8:fr32 = ninf nofpexcept VMULSSrr killed %7, killed %5, implicit $mxcsr
-  ; CHECK:   %9:fr32 = ninf nofpexcept VMULSSrr [[COPY]], %8, implicit $mxcsr
-  ; CHECK:   %10:fr32 = ninf nofpexcept VFMADD213SSr %8, %9, [[VMOVSSrm_alt]], implicit $mxcsr
-  ; CHECK:   %11:fr32 = ninf nofpexcept VMULSSrr %9, [[VMOVSSrm_alt1]], implicit $mxcsr
-  ; CHECK:   %12:fr32 = ninf nofpexcept VMULSSrr killed %11, killed %10, implicit $mxcsr
+  ; CHECK:   %7:fr32 = ninf afn nofpexcept VMULSSrr [[VRSQRTSSr]], [[VMOVSSrm_alt1]], implicit $mxcsr
+  ; CHECK:   %8:fr32 = ninf afn nofpexcept VMULSSrr killed %7, killed %5, implicit $mxcsr
+  ; CHECK:   %9:fr32 = ninf afn nofpexcept VMULSSrr [[COPY]], %8, implicit $mxcsr
+  ; CHECK:   %10:fr32 = ninf afn nofpexcept VFMADD213SSr %8, %9, [[VMOVSSrm_alt]], implicit $mxcsr
+  ; CHECK:   %11:fr32 = ninf afn nofpexcept VMULSSrr %9, [[VMOVSSrm_alt1]], implicit $mxcsr
+  ; CHECK:   %12:fr32 = ninf afn nofpexcept VMULSSrr killed %11, killed %10, implicit $mxcsr
    ; CHECK:   [[COPY1:%[0-9]+]]:vr128 = COPY %12
    ; CHECK:   [[FsFLD0SS:%[0-9]+]]:fr32 = FsFLD0SS
    ; CHECK:   %15:fr32 = nofpexcept VCMPSSrr [[COPY]], killed [[FsFLD0SS]], 0, implicit $mxcsr
@@ -86,7 +86,7 @@ define float @sqrt_daz_ninf(float %f) #1 {
    ; CHECK:   [[COPY3:%[0-9]+]]:fr32 = COPY [[VPANDNrr]]
    ; CHECK:   $xmm0 = COPY [[COPY3]]
    ; CHECK:   RET 0, $xmm0
-  %call = tail call ninf float @llvm.sqrt.f32(float %f)
+  %call = tail call ninf afn float @llvm.sqrt.f32(float %f)
    ret float %call
  }
  
diff --git a/llvm/test/CodeGen/X86/sqrt-fastmath.ll b/llvm/test/CodeGen/X86/sqrt-fastmath.ll

index e51ef05..a49d550 100644 (file)
--- a/llvm/test/CodeGen/X86/sqrt-fastmath.ll
+++ b/llvm/test/CodeGen/X86/sqrt-fastmath.ll
@@ -118,7 +118,7 @@ define float @finite_f32_estimate_ieee_ninf(float %f) #1 {
  ; AVX512-NEXT:    vmovss %xmm0, %xmm1, %xmm1 {%k1}
  ; AVX512-NEXT:    vmovaps %xmm1, %xmm0
  ; AVX512-NEXT:    retq
-  %call = tail call ninf float @__sqrtf_finite(float %f) #2
+  %call = tail call ninf afn float @__sqrtf_finite(float %f) #2
    ret float %call
  }
  
@@ -177,7 +177,7 @@ define float @finite_f32_estimate_daz_ninf(float %f) #4 {
  ; AVX512-NEXT:    vmovss %xmm2, %xmm1, %xmm1 {%k1}
  ; AVX512-NEXT:    vmovaps %xmm1, %xmm0
  ; AVX512-NEXT:    retq
-  %call = tail call ninf float @__sqrtf_finite(float %f) #2
+  %call = tail call ninf afn float @__sqrtf_finite(float %f) #2
    ret float %call
  }
  
@@ -262,7 +262,7 @@ define float @sqrtf_check_denorms_ninf(float %x) #3 {
  ; AVX512-NEXT:    vmovss %xmm0, %xmm1, %xmm1 {%k1}
  ; AVX512-NEXT:    vmovaps %xmm1, %xmm0
  ; AVX512-NEXT:    retq
-  %call = tail call ninf float @__sqrtf_finite(float %x) #2
+  %call = tail call ninf afn float @__sqrtf_finite(float %x) #2
    ret float %call
  }
  
@@ -327,7 +327,7 @@ define <4 x float> @sqrt_v4f32_check_denorms_ninf(<4 x float> %x) #3 {
  ; AVX512-NEXT:    vcmpleps %xmm0, %xmm2, %xmm0
  ; AVX512-NEXT:    vandps %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    retq
-  %call = tail call ninf <4 x float> @llvm.sqrt.v4f32(<4 x float> %x) #2
+  %call = tail call ninf afn <4 x float> @llvm.sqrt.v4f32(<4 x float> %x) #2
    ret <4 x float> %call
  }
author	QingShan Zhang <qshanz@cn.ibm.com>
	Mon, 11 Jan 2021 02:25:53 +0000 (02:25 +0000)
committer	QingShan Zhang <qshanz@cn.ibm.com>
	Mon, 11 Jan 2021 02:25:53 +0000 (02:25 +0000)
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/SIISelLowering.cpp		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/fneg-combines.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/frem.ll		patch \| blob \| history
llvm/test/CodeGen/NVPTX/fast-math.ll		patch \| blob \| history
llvm/test/CodeGen/NVPTX/sqrt-approx.ll		patch \| blob \| history
llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll		patch \| blob \| history
llvm/test/CodeGen/X86/sqrt-fastmath.ll		patch \| blob \| history