[X86][FP16] Only generate approximate rsqrt when Reciprocal is true for half type

author Phoebe Wang <phoebe.wang@intel.com>

Thu, 2 Dec 2021 05:11:07 +0000 (13:11 +0800)

committer Phoebe Wang <phoebe.wang@intel.com>

Thu, 2 Dec 2021 05:52:45 +0000 (13:52 +0800)
author Phoebe Wang <phoebe.wang@intel.com>
Thu, 2 Dec 2021 05:11:07 +0000 (13:11 +0800)
committer Phoebe Wang <phoebe.wang@intel.com>
Thu, 2 Dec 2021 05:52:45 +0000 (13:52 +0800)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp

index b82a74b..62b2387 100644 (file)
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -23190,6 +23190,10 @@ static SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
  bool X86TargetLowering::isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const {
    EVT VT = Op.getValueType();
  
+  // We don't need to replace SQRT with RSQRT for half type.
+  if (VT.getScalarType() == MVT::f16)
+    return true;
+
    // We never want to use both SQRT and RSQRT instructions for the same input.
    if (DAG.getNodeIfExists(X86ISD::FRSQRT, DAG.getVTList(VT), Op))
      return false;
@@ -23236,6 +23240,7 @@ SDValue X86TargetLowering::getSqrtEstimate(SDValue Op,
  
    if (VT.getScalarType() == MVT::f16 && isTypeLegal(VT) &&
        Subtarget.hasFP16()) {
+    assert(Reciprocal && "Don't replace SQRT with RSQRT for half type");
      if (RefinementSteps == ReciprocalEstimate::Unspecified)
        RefinementSteps = 0;
  
diff --git a/llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll b/llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll

index 34f1391..6a5c3e2 100644 (file)
--- a/llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll
@@ -123,12 +123,7 @@ define half @test_sqrt_sh2(half %a0, half %a1) {
  define half @test_sqrt_sh3(half %a0, half %a1) {
  ; CHECK-LABEL: test_sqrt_sh3:
  ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpbroadcastw {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
-; CHECK-NEXT:    vpand %xmm1, %xmm0, %xmm1
-; CHECK-NEXT:    vcmpltsh {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1
-; CHECK-NEXT:    vrsqrtsh %xmm0, %xmm0, %xmm0
-; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
-; CHECK-NEXT:    vmovsh %xmm1, %xmm0, %xmm0 {%k1}
+; CHECK-NEXT:    vsqrtsh %xmm0, %xmm0, %xmm0
  ; CHECK-NEXT:    retq
    %1 = call fast half @llvm.sqrt.f16(half %a0)
    ret half %1
diff --git a/llvm/test/CodeGen/X86/avx512fp16vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512fp16vl-intrinsics.ll

index d3af80c..b103b23 100644 (file)
--- a/llvm/test/CodeGen/X86/avx512fp16vl-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16vl-intrinsics.ll
@@ -972,10 +972,7 @@ define <8 x half> @test_sqrt_ph_128_fast(<8 x half> %a0, <8 x half> %a1) {
  define <8 x half> @test_sqrt_ph_128_fast2(<8 x half> %a0, <8 x half> %a1) {
  ; CHECK-LABEL: test_sqrt_ph_128_fast2:
  ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpbroadcastw {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
-; CHECK-NEXT:    vpand %xmm1, %xmm0, %xmm1
-; CHECK-NEXT:    vcmpgeph {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %xmm1, %k1
-; CHECK-NEXT:    vrsqrtph %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    vsqrtph %xmm0, %xmm0
  ; CHECK-NEXT:    retq
    %1 = call fast <8 x half> @llvm.sqrt.v8f16(<8 x half> %a0)
    ret <8 x half> %1
author	Phoebe Wang <phoebe.wang@intel.com>
	Thu, 2 Dec 2021 05:11:07 +0000 (13:11 +0800)
committer	Phoebe Wang <phoebe.wang@intel.com>
	Thu, 2 Dec 2021 05:52:45 +0000 (13:52 +0800)
llvm/lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
llvm/test/CodeGen/X86/avx512fp16-intrinsics.ll		patch \| blob \| history
llvm/test/CodeGen/X86/avx512fp16vl-intrinsics.ll		patch \| blob \| history